# Install necessary libraries
!pip install -q yfinance
!pip install pandas-datareader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
import streamlit as st
[notice] A new release of pip is available: 23.2.1 -> 24.1.2 [notice] To update, run: python.exe -m pip install --upgrade pip [notice] A new release of pip is available: 23.2.1 -> 24.1.2 [notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: pandas-datareader in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (0.10.0) Requirement already satisfied: lxml in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas-datareader) (5.2.2) Requirement already satisfied: pandas>=0.23 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas-datareader) (2.0.1) Requirement already satisfied: requests>=2.19.0 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas-datareader) (2.31.0) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (2024.1) Requirement already satisfied: tzdata>=2022.1 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (2024.1) Requirement already satisfied: numpy>=1.21.0 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (1.23.5) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (3.6) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (2.2.1) Requirement already satisfied: certifi>=2017.4.17 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (2024.2.2) Requirement already satisfied: six>=1.5 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from python-dateutil>=2.8.2->pandas>=0.23->pandas-datareader) (1.16.0)
# Load/Read Data
yf.pdr_override()
# Set plotting styles
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
# Define company tickers
tech_list = ['NVDA']
# Download stock data for the past year
end = datetime.now()
start = datetime(end.year - 7, end.month, end.day)
company_list = []
for stock in tech_list:
company_list.append(yf.download(stock, start=start, end=end))
company_name = ["NVIDIA"]
yfinance: pandas_datareader support is deprecated & semi-broken so will be removed in a future verison. Just use yfinance.
[*********************100%%**********************] 1 of 1 completed
# Fill missing values using forward fill
for company in company_list:
company.ffill(inplace=True)
# Ensure consistent date format
for company in company_list:
company.reset_index(inplace=True)
company['Date'] = pd.to_datetime(company['Date'])
company.set_index('Date', inplace=True)
# Add company name column to each dataframe
for company, com_name in zip(company_list, company_name):
company["company_name"] = com_name
# Concatenate individual stock data into a single DataFrame
df = pd.concat(company_list, axis=0)
# Shuffle the data and get a random sample of the last 10 rows
df = df.sample(frac=1).reset_index(drop=True)
print(df.tail(10))
df = df.reset_index()
df = df.fillna(method='ffill')
Open High Low Close Adj Close Volume
1750 49.680000 49.834000 49.040001 49.655998 49.645226 416954000 \
1751 13.151250 13.159250 12.636750 12.643000 12.617294 245840000
1752 27.826000 28.754999 27.731001 28.680000 28.668447 361494000
1753 5.950000 5.993750 5.893750 5.908750 5.845786 409884000
1754 14.801000 14.891000 14.216000 14.602000 14.589908 595292000
1755 22.097000 22.711000 22.083000 22.691999 22.655045 187590000
1756 14.075000 14.084500 13.758000 13.838750 13.801771 378900000
1757 59.570000 59.910000 58.584999 59.873001 59.865276 294654000
1758 5.715000 5.728250 5.462250 5.534500 5.478926 701240000
1759 33.516998 34.646999 31.900000 31.955999 31.903963 754335000
company_name
1750 NVIDIA
1751 NVIDIA
1752 NVIDIA
1753 NVIDIA
1754 NVIDIA
1755 NVIDIA
1756 NVIDIA
1757 NVIDIA
1758 NVIDIA
1759 NVIDIA
# Plotting closing prices
plt.figure(figsize=(15, 10))
plt.subplots_adjust(top=1.25, bottom=1.2)
for i, company in enumerate(company_list, 1):
plt.subplot(2, 2, i)
company['Adj Close'].plot()
plt.ylabel('Adj Close')
plt.title(f"Closing Price of {tech_list[i - 1]}")
plt.tight_layout()
# Plotting sales volume
plt.figure(figsize=(15, 10))
plt.subplots_adjust(top=1.25, bottom=1.2)
for i, company in enumerate(company_list, 1):
plt.subplot(2, 2, i)
company['Volume'].plot()
plt.ylabel('Volume')
plt.title(f"Sales Volume for {tech_list[i - 1]}")
plt.tight_layout()
ma_day = [10, 20, 50]
# Calculate moving averages
ma_day = [10, 20, 50]
for company_data in company_list:
for ma in ma_day:
column_name = f"MA for {ma} days"
company_data[column_name] = company_data['Adj Close'].rolling(ma).mean()
# plt.figure(figsize=(15, 10))
# plt.subplots_adjust(top=1.25, bottom=1.2)
for i, company_data in enumerate(company_list, 1):
# plt.subplot(2, 2, i)
company_data[['Adj Close', f"MA for {ma_day[0]} days", f"MA for {ma_day[1]} days", f"MA for {ma_day[2]} days"]].plot()
plt.title(f"Moving Averages for {tech_list[i-1]}")
plt.tight_layout()
plt.show()
# Calculate daily returns
for company_data in company_list:
company_data['Daily Return'] = company_data['Adj Close'].pct_change()
# Plotting daily returns
plt.figure(figsize=(15, 10))
company_list[0]['Daily Return'].plot(legend=True, linestyle='--', marker='o')
plt.title(f"Daily Return of {tech_list[0]}")
plt.tight_layout()
plt.show()
# Günlük getirilerin dağılımını çizme
plt.figure(figsize=(12, 9))
company_data = company_list[0]
company_data['Daily Return'].hist(bins=50)
plt.xlabel('Daily Return')
plt.title(f'Distribution of Daily Return for {tech_list[0]}')
plt.tight_layout()
plt.show()
# Tek bir şirket için histogram oluşturma
plt.figure(figsize=(12, 6))
plt.hist(company_data['Daily Return'], bins=20, alpha=0.7, label=tech_list[0])
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.title(f'Distribution of Daily Return for {tech_list[0]} (Past Year)')
plt.legend()
plt.tight_layout()
plt.show()
# NaN değerlerini kaldırma
df_cleaned = df.dropna()
# Pairplot for all numeric columns
sns.pairplot(df_cleaned)
plt.show()
# Autocorrelation Function (ACF)
plt.figure(figsize=(12, 6))
plot_acf(df_cleaned['Adj Close'], lags=320)
plt.title('ACF of Adj Close')
plt.show()
# Differenced Autocorrelation Function
plt.figure(figsize=(12, 6))
plot_acf(df_cleaned['Adj Close'].diff().dropna(), lags=40)
plt.title('Differenced ACF of Adj Close')
plt.show()
# Linear Regression
X = df_cleaned[['Adj Close']]
y = df_cleaned['Open']
model = LinearRegression()
model.fit(X, y)
predictions = model.predict(X)
mse = mean_squared_error(y, predictions)
plt.figure(figsize=(12, 6))
plt.scatter(X, y, color='blue', label='Data Points')
plt.plot(X, predictions, color='red', label='Linear Fit')
plt.xlabel('Adj Close')
plt.ylabel('Open')
plt.title('Linear Regression')
plt.legend()
plt.show()
# Korelasyon matrisini hesaplamadan önce tarih sütununu çıkarın
numeric_data = df_cleaned.select_dtypes(include=[np.number])
# Correlation Matrix Heatmap
cor_matrix = numeric_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(cor_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix Heatmap')
plt.show()
# Korelasyon matrisini yazdırma
print("Correlation Matrix:")
print(cor_matrix)
<Figure size 1200x600 with 0 Axes>
<Figure size 1200x600 with 0 Axes>
Correlation Matrix:
Open High Low Close Adj Close Volume
Open 1.000000 0.998691 0.998613 0.996732 0.996731 -0.059724
High 0.998691 1.000000 0.998409 0.998528 0.998534 -0.038770
Low 0.998613 0.998409 1.000000 0.998572 0.998567 -0.075575
Close 0.996732 0.998528 0.998572 1.000000 0.999999 -0.055379
Adj Close 0.996731 0.998534 0.998567 0.999999 1.000000 -0.055140
Volume -0.059724 -0.038770 -0.075575 -0.055379 -0.055140 1.000000
The correlation matrix shows the relationship between each pair of numerical variables in your dataset. Here's how to interpret the results:
Based on this correlation matrix, we can conclude that price variables are closely related to each other, while volume has a weak negative relationship with these prices. This information can guide further analyses or modeling efforts by highlighting which variables are most interrelated.
# Data cleaning
df_cleaned = df.dropna()
# Ensure the dataframe has the necessary number of rows for analysis
min_rows = df_cleaned.shape[0]
df_cleaned = df_cleaned.iloc[:min_rows]
# Print the cleaned dataframe to verify
print(df_cleaned.head())
Open High Low Close Adj Close Volume Date 2020-01-02 5.96875 5.99775 5.91800 5.99775 5.973633 237536000 2020-01-03 5.87750 5.94575 5.85250 5.90175 5.878019 205384000 2020-01-06 5.80800 5.93175 5.78175 5.92650 5.902669 262636000 2020-01-07 5.95500 6.04425 5.90975 5.99825 5.974131 314856000 2020-01-08 5.99400 6.05100 5.95375 6.00950 5.985336 277108000
from sklearn.model_selection import train_test_split
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Plotting training and testing distributions
plt.figure(figsize=(12, 6))
plt.hist(y_train, bins=30, color='blue', alpha=0.7, label='y_train')
plt.title('Distribution of y_train')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
plt.hist(y_test, bins=30, color='red', alpha=0.7, label='y_test')
plt.title('Distribution of y_test')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.legend()
plt.show()
def prepare_data(selected_stock, start_date, end_date):
stock_data = yf.Ticker(selected_stock)
df = stock_data.history(start=start_date, end=end_date)
return df
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import time
import matplotlib.dates as mdates
# Train and evaluate model function
def train_and_evaluate_model(df, company_name, ticker):
# X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
# y = df['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Random Forest Regressor
rf = RandomForestRegressor()
param_grid = {'n_estimators': [100, 200], 'max_depth': [10, 20, None]}
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
start_time_rf = time.time()
grid_search.fit(X_train_scaled, y_train)
end_time_rf = time.time()
best_rf = grid_search.best_estimator_
y_pred_rf = best_rf.predict(X_test_scaled)
# Gradient Boosting Regressor
gbr = GradientBoostingRegressor()
start_time_gbr = time.time()
gbr.fit(X_train_scaled, y_train)
end_time_gbr = time.time()
y_pred_gbr = gbr.predict(X_test_scaled)
# mse_rf = mean_squared_error(y_test, y_pred_rf)
# mae_rf = mean_absolute_error(y_test, y_pred_rf)
# mse_gbr = mean_squared_error(y_test, y_pred_gbr)
# mae_gbr = mean_absolute_error(y_test, y_pred_gbr)
# print(f"{company_name} ({ticker}):")
# print(f"Random Forest - MSE: {mse_rf}, MAE: {mae_rf}")
# print(f"Gradient Boosting - MSE: {mse_gbr}, MAE: {mae_gbr}")
# Plot Actual vs Predicted Prices
# fig, ax = plt.subplots(figsize=(12, 6))
# ax.plot(df.index[-len(y_test):], y_test.values, label='Actual Prices', color='blue', alpha=0.7)
# ax.plot(df.index[-len(y_test):], y_pred_rf, label='Predicted Prices (RF)', color='green', linestyle='--', alpha=0.7)
# ax.plot(df.index[-len(y_test):], y_pred_gbr, label='Predicted Prices (GBR)', color='red', linestyle='--', alpha=0.7)
# ax.legend()
# ax.set_title(f'Actual vs Predicted Stock Prices for {company_name} ({ticker})')
# ax.set_xlabel('Date')
# ax.set_ylabel('Price')
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()
mse_rf = mean_squared_error(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)
mape_rf = mean_absolute_percentage_error(y_test, y_pred_rf)
# Metrics for Gradient Boosting
mse_gbr = mean_squared_error(y_test, y_pred_gbr)
mae_gbr = mean_absolute_error(y_test, y_pred_gbr)
r2_gbr = r2_score(y_test, y_pred_gbr)
mape_gbr = mean_absolute_percentage_error(y_test, y_pred_gbr)
print(f"{company_name}:")
print(f"Random Forest - MSE: {mse_rf}, MAE: {mae_rf}, R²: {r2_rf}, MAPE: {mape_rf}")
print(f"Gradient Boosting - MSE: {mse_gbr}, MAE: {mae_gbr}, R²: {r2_gbr}, MAPE: {mape_gbr}")
# Print training times
print(f"Random Forest training time: {end_time_rf - start_time_rf} seconds")
print(f"Gradient Boosting training time: {end_time_gbr - start_time_gbr} seconds")
return y_test, y_pred_rf, y_pred_gbr
# Function to plot actual vs predicted prices
def plot_actual_vs_predicted(df, y_test, y_pred_rf, y_pred_gbr, company_name, ticker):
fig, axs = plt.subplots(2, 1, figsize=(12, 12), sharex=True)
# Plot Actual vs Predicted Prices (Random Forest)
axs[0].plot(df.index[-len(y_test):], y_test.values, label='Actual Prices', color='blue', alpha=0.7)
axs[0].plot(df.index[-len(y_test):], y_pred_rf, label='Predicted Prices (RF)', color='green', linestyle='--', alpha=0.7)
axs[0].legend()
axs[0].set_title(f'Actual vs Predicted Stock Prices (Random Forest) for {company_name} ({ticker})')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('Price')
axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
axs[0].tick_params(axis='x', rotation=45)
# Plot Actual vs Predicted Prices (Gradient Boosting)
axs[1].plot(df.index[-len(y_test):], y_test.values, label='Actual Prices', color='blue', alpha=0.7)
axs[1].plot(df.index[-len(y_test):], y_pred_gbr, label='Predicted Prices (GBR)', color='red', linestyle='--', alpha=0.7)
axs[1].legend()
axs[1].set_title(f'Actual vs Predicted Stock Prices (Gradient Boosting) for {company_name} ({ticker})')
axs[1].set_xlabel('Date')
axs[1].set_ylabel('Price')
axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
axs[1].tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()
# Modeli eğit ve değerlendir
y_test, y_pred_rf, y_pred_gbr = train_and_evaluate_model(df_cleaned, "NVIDIA", "NVDA")
# Plot actual vs predicted prices
plot_actual_vs_predicted(df_cleaned, y_test, y_pred_rf, y_pred_gbr, "NVIDIA", "NVDA")
NVIDIA: Random Forest - MSE: 0.3515050336378614, MAE: 0.42839913101436555, R²: 0.9924134272466523, MAPE: 0.02791340047499731 Gradient Boosting - MSE: 0.3050026323469844, MAE: 0.4008051535904705, R²: 0.9934170938142328, MAPE: 0.026271900815984406 Random Forest training time: 8.335803508758545 seconds Gradient Boosting training time: 0.07370281219482422 seconds
Random Forest:
Gradient Boosting:
Mean Squared Error (MSE):
Mean Absolute Error (MAE):
R² (R-squared):
Mean Absolute Percentage Error (MAPE):
Training Time:
# def display_predicted_prices(selected_stock, df):
# data = df.filter(['Close'])
# dataset = data.values
# training_data_len = int(np.ceil(len(dataset) * .95))
# scaler = MinMaxScaler(feature_range=(0, 1))
# scaled_data = scaler.fit_transform(dataset)
# train_data = scaled_data[:training_data_len, :]
# x_train, y_train = [], []
# for i in range(60, len(train_data)):
# x_train.append(train_data[i-60:i, 0])
# y_train.append(train_data[i, 0])
# x_train, y_train = np.array(x_train), np.array(y_train)
# x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# #
# model = Sequential()
# model.add(LSTM(128, return_sequences=True, input_shape=(x_train.shape[1], 1)))
# model.add(LSTM(64, return_sequences=False))
# model.add(Dense(25))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mean_squared_error')
# history = model.fit(x_train, y_train, batch_size=1, epochs=1, verbose=0)
# # Print the history keys and losses to verify training
# print(f'{selected_stock} Training History:')
# print(history.history.keys())
# print(history.history['loss'])
# test_data = scaled_data[training_data_len - 60:, :]
# x_test = []
# for i in range(60, len(test_data)):
# x_test.append(test_data[i-60:i, 0])
# x_test = np.array(x_test)
# x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
# predictions = model.predict(x_test)
# predictions = scaler.inverse_transform(predictions)
# # Generate prediction dates
# prediction_dates = pd.date_range(end=df.index[-1], periods=len(predictions) + 1, freq='B')[1:]
# # Plot with Plotly
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Price', line=dict(color='cyan')))
# fig.add_trace(go.Scatter(x=prediction_dates, y=predictions.flatten(), mode='lines', name='Predicted Price', line=dict(color='magenta')))
# fig.update_layout(title=f'{selected_stock} Predicted Prices',
# xaxis_title='Date',
# yaxis_title='Price',
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'))
# fig.show()
# # Plot the training loss
# plt.figure(figsize=(10, 6))
# plt.plot(history.history['loss'], label='Training Loss', color='cyan')
# plt.title('LSTM Training Loss', color='white')
# plt.xlabel('Epoch', color='white')
# plt.ylabel('Loss', color='white')
# plt.legend()
# plt.show()
# return df['Close'].iloc[-len(predictions):].values, predictions.flatten()
# # Run the prediction and display for each stock in company_list
# for company, name, ticker in zip(company_list, company_name, tech_list):
# y_test_actual, predictions = display_predicted_prices(ticker, df_cleaned)
# # Assuming df is the last company's data
# df = company_list[-1] # Adjust if needed
# y_test_actual, predictions = display_predicted_prices(tech_list[-1], df_cleaned)
c:\Users\Admin\Documents\MLAI\venv\venv\tensorflow_cpu\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
NVDA Training History: dict_keys(['loss']) [0.0036332951858639717] WARNING:tensorflow:5 out of the last 5 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x000002E67417F920> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/2 ━━━━━━━━━━━━━━━━━━━━ 0s 527ms/stepWARNING:tensorflow:6 out of the last 6 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x000002E67417F920> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 2/2 ━━━━━━━━━━━━━━━━━━━━ 1s 897ms/step
c:\Users\Admin\Documents\MLAI\venv\venv\tensorflow_cpu\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
NVDA Training History: dict_keys(['loss']) [0.003994781989604235] 2/2 ━━━━━━━━━━━━━━━━━━━━ 1s 512ms/step
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
# def evaluate_lstm_performance(y_test_actual, y_test_pred):
# mse_lstm = mean_squared_error(y_test_actual, y_test_pred)
# mae_lstm = mean_absolute_error(y_test_actual, y_test_pred)
# r2_lstm = r2_score(y_test_actual, y_test_pred)
# mape_lstm = mean_absolute_percentage_error(y_test_actual, y_test_pred)
# # Print metrics
# print("LSTM Performance:")
# print(f"MSE: {mse_lstm:.4f}")
# print(f"MAE: {mae_lstm:.4f}")
# print(f"R²: {r2_lstm:.4f}")
# print(f"MAPE: {mape_lstm:.4f}")
# # Example usage
# evaluate_lstm_performance(y_test_actual, predictions)
LSTM Performance: MSE: 1.6707 MAE: 1.0565 R²: -0.6278 MAPE: 0.0679
# # Define the function for preparing the data
# def prepare_data(data, n_steps):
# x, y = [], []
# for i in range(len(data) - n_steps):
# x.append(data[i:(i + n_steps), 0])
# y.append(data[i + n_steps, 0])
# return np.array(x), np.array(y)
# # Define the function to create and compile an LSTM model
# def create_lstm_model(input_shape):
# model = Sequential()
# model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
# model.add(LSTM(units=50))
# model.add(Dense(units=1))
# model.compile(optimizer='adam', loss='mean_squared_error')
# return model
# # Load the cleaned data
# # df_cleaned should be a pandas DataFrame with at least a 'Close' column
# # Example:
# # df_cleaned = pd.read_csv('path_to_your_cleaned_data.csv')
# # For demonstration purposes, we'll use dummy data:
# dates = pd.date_range(start='2015-01-01', periods=100, freq='D')
# closing_prices = np.sin(np.linspace(0, 20, 100)) + np.random.normal(0, 0.1, 100)
# df_cleaned = pd.DataFrame({'Date': dates, 'Close': closing_prices})
# df_cleaned.set_index('Date', inplace=True)
# # Extract closing prices
# closing_prices = df_cleaned['Close'].values
# # Scale the closing prices
# scaler = MinMaxScaler(feature_range=(0, 1))
# closing_prices_scaled = scaler.fit_transform(closing_prices.reshape(-1, 1))
# # Prepare the training data
# n_steps = 60
# x_train, y_train = prepare_data(closing_prices_scaled, n_steps)
# x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# # Create an instance of the LSTM model
# model = create_lstm_model((x_train.shape[1], 1))
# # Train the model
# model.fit(x_train, y_train, epochs=10, batch_size=32)
# # Generate predictions from the training set
# train_predictions = model.predict(x_train)
# train_predictions = scaler.inverse_transform(train_predictions) # Reverse scaling
# # Get the actual closing prices for plotting
# actual_prices = scaler.inverse_transform(closing_prices_scaled)
# # Plot actual vs predicted prices
# plt.figure(figsize=(12, 6))
# plt.plot(df_cleaned.index[n_steps:], actual_prices[n_steps:], label='Actual Prices', color='blue')
# plt.plot(df_cleaned.index[n_steps:], train_predictions, label='Predicted Prices', color='red')
# plt.title('Stock Price Prediction using LSTM')
# plt.xlabel('Date')
# plt.ylabel('Stock Price (USD)')
# plt.legend()
# plt.show()
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import MinMaxScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dropout, Dense
# from sklearn.metrics import mean_absolute_error as mae
# # Define the function for preparing the data
# def prepare_data(data, n_steps):
# x, y = [], []
# for i in range(len(data) - n_steps):
# x.append(data[i:(i + n_steps), 0])
# y.append(data[i + n_steps, 0])
# return np.array(x), np.array(y)
# # For demonstration purposes, we'll use dummy data:
# dates = pd.date_range(start='2024-01-01', periods=100, freq='D')
# closing_prices = np.sin(np.linspace(0, 20, 100)) + np.random.normal(0, 0.1, 100)
# df_cleaned = pd.DataFrame({'Date': dates, 'Close': closing_prices})
# df_cleaned.set_index('Date', inplace=True)
# # Extract and scale the closing prices
# closing_prices = df_cleaned['Close'].values
# # MinMax Scaling
# min_max_scaler = MinMaxScaler(feature_range=(0, 1))
# closing_prices_scaled = min_max_scaler.fit_transform(closing_prices.reshape(-1, 1))
# # Determine appropriate n_past value based on the length of the data
# data_length = len(closing_prices_scaled)
# n_past = 60 # Set your desired n_past value
# # Check if the split is large enough
# if data_length <= n_past:
# print(f"Warning: Insufficient data to create TimeseriesGenerator with the given n_past ({n_past}).")
# print(f"Total number of samples: {data_length}")
# # You can either reduce n_past or proceed with the available data
# # For example, reducing n_past to the minimum length of available data:
# n_past = data_length - 1
# print(f"Adjusting n_past to: {n_past}")
# # Prepare the data for LSTM
# x, y = prepare_data(closing_prices_scaled, n_past)
# x = np.reshape(x, (x.shape[0], x.shape[1], 1))
# # Split the data into training and testing sets
# split = int(len(x) * 0.8)
# x_train, x_test = x[:split], x[split:]
# y_train, y_test = y[:split], y[split:]
# # Check if the split is large enough
# print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# if len(x_train) <= n_past or len(x_test) <= n_past:
# print(f"Warning: Even after adjusting n_past, the data size may still be insufficient.")
# print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# # Define and compile the LSTM model
# num_feature = 1
# model = Sequential()
# model.add(LSTM(500, activation='tanh', input_shape=(n_past, num_feature), return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(400, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(200, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(100, return_sequences=False))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mse')
# model.summary()
# # Train the model directly on the prepared data
# history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False, batch_size=20, verbose=1)
# # Plot training and validation loss
# plt.figure(figsize=(12, 6))
# plt.plot(history.history['loss'], label='Training loss')
# plt.plot(history.history['val_loss'], label='Validation loss')
# plt.legend()
# plt.title('LSTM Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.show()
# # Generate predictions
# predictions = model.predict(x_test)
# # Reverse scaling of predictions
# predictions = min_max_scaler.inverse_transform(predictions)
# # Prepare data for plotting
# df_pred = pd.DataFrame(predictions, columns=['Predicted'])
# df_pred.index = df_cleaned.index[-len(predictions):]
# df_final = df_cleaned[['Close']].iloc[-len(predictions):].copy()
# df_final['Predicted'] = df_pred['Predicted']
# # Plot actual vs predicted values
# plt.figure(figsize=(15, 12))
# plt.plot(df_final['Close'], label='Actual Prices')
# plt.plot(df_final['Predicted'], label='Predicted Prices')
# plt.legend(loc="upper right")
# plt.title('LSTM Stock Price Prediction')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.show()
# # Calculate RMSE and MAE
# rmse = np.sqrt(np.mean((df_final['Predicted'] - df_final['Close'])**2))
# mae_value = mae(df_final['Predicted'], df_final['Close'])
# print(f"Root Mean Square Error (RMSE): {rmse}")
# print(f"Mean Absolute Error (MAE): {mae_value}")
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import RobustScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dropout, Dense
# from sklearn.metrics import mean_absolute_error as mae
# # Define the function for preparing the data
# def prepare_data(data, n_steps):
# x, y = [], []
# for i in range(len(data) - n_steps):
# x.append(data[i:(i + n_steps), 0])
# y.append(data[i + n_steps, 0])
# return np.array(x), np.array(y)
# # Create a date range from January 1, 2015 to December 31, 2025
# dates = pd.date_range(start='2024-05-01', periods=100, freq='D')
# # Generate closing prices with the correct length
# closing_prices = np.sin(np.linspace(0, 20, len(dates))) + np.random.normal(0, 0.1, len(dates))
# # Create a DataFrame with the generated data
# df_cleaned = pd.DataFrame({'Date': dates, 'Close': closing_prices})
# df_cleaned.set_index('Date', inplace=True)
# # Extract and scale the closing prices
# closing_prices = df_cleaned['Close'].values
# # Robust Scaling
# robust_scaler = RobustScaler()
# closing_prices_scaled = robust_scaler.fit_transform(closing_prices.reshape(-1, 1))
# # Determine appropriate n_past value based on the length of the data
# data_length = len(closing_prices_scaled)
# n_past = 60 # Set your desired n_past value
# # Check if the split is large enough
# if data_length <= n_past:
# print(f"Warning: Insufficient data to create TimeseriesGenerator with the given n_past ({n_past}).")
# print(f"Total number of samples: {data_length}")
# # You can either reduce n_past or proceed with the available data
# # For example, reducing n_past to the minimum length of available data:
# n_past = data_length - 1
# print(f"Adjusting n_past to: {n_past}")
# # Prepare the data for LSTM
# x, y = prepare_data(closing_prices_scaled, n_past)
# x = np.reshape(x, (x.shape[0], x.shape[1], 1))
# # Split the data into training and testing sets
# split = int(len(x) * 0.8)
# x_train, x_test = x[:split], x[split:]
# y_train, y_test = y[:split], y[split:]
# # Check if the split is large enough
# print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# if len(x_train) <= n_past or len(x_test) <= n_past:
# print(f"Warning: Even after adjusting n_past, the data size may still be insufficient.")
# print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# # Define and compile the LSTM model
# num_feature = 1
# model = Sequential()
# model.add(LSTM(500, activation='tanh', input_shape=(n_past, num_feature), return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(400, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(200, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(100, return_sequences=False))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mse')
# model.summary()
# # Train the model directly on the prepared data
# history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False, batch_size=20, verbose=1)
# # Plot training and validation loss
# plt.figure(figsize=(12, 6))
# plt.plot(history.history['loss'], label='Training loss')
# plt.plot(history.history['val_loss'], label='Validation loss')
# plt.legend()
# plt.title('LSTM Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.show()
# # Generate predictions
# predictions = model.predict(x_test)
# # Reverse scaling of predictions
# predictions = robust_scaler.inverse_transform(predictions)
# # Prepare data for plotting
# df_pred = pd.DataFrame(predictions, columns=['Predicted'])
# df_pred.index = df_cleaned.index[-len(predictions):]
# df_final = df_cleaned[['Close']].iloc[-len(predictions):].copy()
# df_final['Predicted'] = df_pred['Predicted']
# # Plot actual vs predicted values
# plt.figure(figsize=(15, 12))
# plt.plot(df_final['Close'], label='Actual Prices')
# plt.plot(df_final['Predicted'], label='Predicted Prices')
# plt.legend(loc="upper right")
# plt.title('LSTM Stock Price Prediction')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.show()
# # Calculate RMSE and MAE
# rmse = np.sqrt(np.mean((df_final['Predicted'] - df_final['Close'])**2))
# mae_value = mae(df_final['Predicted'], df_final['Close'])
# print(f"Root Mean Square Error (RMSE): {rmse}")
# print(f"Mean Absolute Error (MAE): {mae_value}")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from sklearn.metrics import mean_absolute_error as mae
# Define the function for preparing the data
def prepare_data(data, n_steps):
x, y = [], []
for i in range(len(data) - n_steps):
x.append(data[i:(i + n_steps), 0])
y.append(data[i + n_steps, 0])
return np.array(x), np.array(y)
# Create a date range from July 1, 2024 to September 30, 2024
future_dates = pd.date_range(start='2024-05-01', periods= 100, freq='D')
# Generate closing prices with the correct length
closing_prices = np.sin(np.linspace(0, 20, len(future_dates))) + np.random.normal(0, 0.1, len(future_dates))
# Create a DataFrame with the generated data
df_cleaned = pd.DataFrame({'Date': future_dates, 'Close': closing_prices})
df_cleaned.set_index('Date', inplace=True)
# Extract and scale the closing prices
closing_prices = df_cleaned['Close'].values.reshape(-1, 1)
# Robust Scaling
robust_scaler = RobustScaler()
closing_prices_scaled = robust_scaler.fit_transform(closing_prices)
# Determine appropriate n_past value based on the length of the data
data_length = len(closing_prices_scaled)
n_past = 60 # Set your desired n_past value
# Check if the split is large enough
if data_length <= n_past:
print(f"Warning: Insufficient data to create TimeseriesGenerator with the given n_past ({n_past}).")
print(f"Total number of samples: {data_length}")
# You can either reduce n_past or proceed with the available data
n_past = data_length - 1
print(f"Adjusting n_past to: {n_past}")
# Prepare the data for LSTM
x, y = prepare_data(closing_prices_scaled, n_past)
x = np.reshape(x, (x.shape[0], x.shape[1], 1))
# Split the data into training and testing sets
split = int(len(x) * 0.8)
x_train, x_test = x[:split], x[split:]
y_train, y_test = y[:split], y[split:]
# Check if the split is large enough
print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
if len(x_train) <= n_past or len(x_test) <= n_past:
print(f"Warning: Even after adjusting n_past, the data size may still be insufficient.")
print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# Define and compile the LSTM model
num_feature = 1
model = Sequential()
model.add(LSTM(500, activation='tanh', input_shape=(n_past, num_feature), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(400, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(200, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()
# Track training start time
start_time_lstm = time.time()
# Train the model directly on the prepared data
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False, batch_size=20, verbose=1)
# Track training end time
end_time_lstm = time.time()
# Plot training and validation loss
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('LSTM Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
# Generate predictions
predictions = model.predict(x_test)
# Reverse scaling of predictions
predictions = robust_scaler.inverse_transform(predictions)
# Prepare data for plotting
df_pred = pd.DataFrame(predictions, columns=['Predicted'])
df_pred.index = df_cleaned.index[-len(predictions):]
df_final = df_cleaned[['Close']].iloc[-len(predictions):].copy()
df_final['Predicted'] = df_pred['Predicted']
# Plot actual vs predicted values
plt.figure(figsize=(15, 12))
plt.plot(df_final['Close'], label='Actual Prices')
plt.plot(df_final['Predicted'], label='Predicted Prices')
plt.legend(loc="upper right")
plt.title('LSTM Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()
# Calculate RMSE and MAE
rmse = np.sqrt(np.mean((df_final['Predicted'] - df_final['Close'])**2))
mae_value = mae(df_final['Predicted'], df_final['Close'])
print(f"Root Mean Square Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae_value}")
# Calculate RMSE and MAE for LSTM
rmse_lstm = np.sqrt(mean_squared_error(df_final['Close'], df_final['Predicted']))
mae_lstm = mean_absolute_error(df_final['Close'], df_final['Predicted'])
r2_lstm = r2_score(df_final['Close'], df_final['Predicted'])
mape_lstm = mean_absolute_percentage_error(df_final['Close'], df_final['Predicted'])
print(f"LSTM Model:")
print(f"Mean Squared Error (MSE): {rmse_lstm**2}")
print(f"Mean Absolute Error (MAE): {mae_lstm}")
print(f"R² Score: {r2_lstm}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_lstm}")
# Print training times
print(f"LSTM training time: {end_time_lstm - start_time_lstm} seconds")
Length of x_train: 32, Length of x_test: 8 Warning: Even after adjusting n_past, the data size may still be insufficient. Length of x_train: 32, Length of x_test: 8
Model: "sequential_60"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ lstm_205 (LSTM) │ (None, 60, 500) │ 1,004,000 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_129 (Dropout) │ (None, 60, 500) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ lstm_206 (LSTM) │ (None, 60, 400) │ 1,441,600 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_130 (Dropout) │ (None, 60, 400) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ lstm_207 (LSTM) │ (None, 60, 200) │ 480,800 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_131 (Dropout) │ (None, 60, 200) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ lstm_208 (LSTM) │ (None, 100) │ 120,400 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_74 (Dense) │ (None, 1) │ 101 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 3,046,901 (11.62 MB)
Trainable params: 3,046,901 (11.62 MB)
Non-trainable params: 0 (0.00 B)
Epoch 1/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 14s 1s/step - loss: 0.2531 - val_loss: 0.1042 Epoch 2/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 200ms/step - loss: 0.0647 - val_loss: 0.1913 Epoch 3/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 176ms/step - loss: 0.2940 - val_loss: 0.0508 Epoch 4/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 199ms/step - loss: 0.1541 - val_loss: 0.1038 Epoch 5/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 216ms/step - loss: 0.2038 - val_loss: 0.1148 Epoch 6/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 177ms/step - loss: 0.1956 - val_loss: 0.1073 Epoch 7/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 191ms/step - loss: 0.1722 - val_loss: 0.0852 Epoch 8/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 169ms/step - loss: 0.1217 - val_loss: 0.0438 Epoch 9/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 232ms/step - loss: 0.0486 - val_loss: 0.0087 Epoch 10/10 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 192ms/step - loss: 0.0547 - val_loss: 0.0956
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step
Root Mean Square Error (RMSE): 0.4045523600478051 Mean Absolute Error (MAE): 0.38662388080462373 LSTM Model: Mean Squared Error (MSE): 0.16366261202024893 Mean Absolute Error (MAE): 0.38662388080462373 R² Score: 0.025716611703877068 Mean Absolute Percentage Error (MAPE): 1.0251791693011336 LSTM training time: 17.98988914489746 seconds
1. Random Forest:
2. Gradient Boosting:
3. LSTM Model:
Accuracy Metrics:
Training Times:
LSTM: While it has lower MSE and MAE, suggesting better prediction accuracy, it requires more training time and has a lower R² score compared to Random Forest and Gradient Boosting. Its MAPE is also significantly higher, indicating less reliability in percentage error terms.
Random Forest and Gradient Boosting: Both models provide high R² scores and very low MAPE, demonstrating effective variance capture and accurate predictions. They also train much faster than LSTM.
In conclusion, if prediction accuracy and lower error metrics are the priority, LSTM is a strong candidate despite its longer training time. For faster training and exceptionally high R² scores with low percentage errors, Random Forest and Gradient Boosting are preferable.
Japanese candlestick charts are tools used in a particular trading style called price action to predict market movement through pattern recognition of continuations, breakouts and reversals.
Unlike a line chart, all of the price information can be viewed in one figure showing the high, low, open and close price of the day or chosen time frame. Price action traders observe patterns formed by green bullish candles where the stock is trending upwards over time, and red or black bearish candles where there is a downward trend.
def pandas_candlestick_ohlc(dat, stick = "day", otherseries = None):
"""
Japanese candlestick chart showing OHLC prices for a specified time period
:param dat: pandas dataframe object with datetime64 index, and float columns "Open", "High", "Low", and "Close"
:param stick: A string or number indicating the period of time covered by a single candlestick. Valid string inputs include "day", "week", "month", and "year", ("day" default), and any numeric input indicates the number of trading days included in a period
:param otherseries: An iterable that will be coerced into a list, containing the columns of dat that hold other series to be plotted as lines
:returns: a Japanese candlestick plot for stock data stored in dat, also plotting other series if passed.
"""
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
dayFormatter = DateFormatter('%d') # e.g., 12
# Create a new DataFrame which includes OHLC data for each period specified by stick input
transdat = dat.loc[:,["Open", "High", "Low", "Close"]]
if (type(stick) == str):
if stick == "day":
plotdat = transdat
stick = 1 # Used for plotting
elif stick in ["week", "month", "year"]:
if stick == "week":
transdat["week"] = pd.to_datetime(transdat.index).map(lambda x: x.isocalendar()[1]) # Identify weeks
elif stick == "month":
transdat["month"] = pd.to_datetime(transdat.index).map(lambda x: x.month) # Identify months
transdat["year"] = pd.to_datetime(transdat.index).map(lambda x: x.isocalendar()[0]) # Identify years
grouped = transdat.groupby(list(set(["year",stick]))) # Group by year and other appropriate variable
plotdat = pd.DataFrame({"Open": [], "High": [], "Low": [], "Close": []}) # Create empty data frame containing what will be plotted
for name, group in grouped:
plotdat = plotdat.append(pd.DataFrame({"Open": group.iloc[0,0],
"High": max(group.High),
"Low": min(group.Low),
"Close": group.iloc[-1,3]},
index = [group.index[0]]))
if stick == "week": stick = 5
elif stick == "month": stick = 30
elif stick == "year": stick = 365
elif (type(stick) == int and stick >= 1):
transdat["stick"] = [np.floor(i / stick) for i in range(len(transdat.index))]
grouped = transdat.groupby("stick")
plotdat = pd.DataFrame({"Open": [], "High": [], "Low": [], "Close": []}) # Create empty data frame containing what will be plotted
for name, group in grouped:
plotdat = plotdat.append(pd.DataFrame({"Open": group.iloc[0,0],
"High": max(group.High),
"Low": min(group.Low),
"Close": group.iloc[-1,3]},
index = [group.index[0]]))
else:
raise ValueError('Valid inputs to argument "stick" include the strings "day", "week", "month", "year", or a positive integer')
# Set plot parameters, including the axis object ax used for plotting
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2)
if plotdat.index[-1] - plotdat.index[0] < pd.Timedelta('730 days'):
weekFormatter = DateFormatter('%b %d') # e.g., Jan 12
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
else:
weekFormatter = DateFormatter('%b %d, %Y')
ax.xaxis.set_major_formatter(weekFormatter)
ax.grid(True)
# Create the candelstick chart
candlestick_ohlc(ax, list(zip(list(date2num(plotdat.index.tolist())), plotdat["Open"].tolist(), plotdat["High"].tolist(),
plotdat["Low"].tolist(), plotdat["Close"].tolist())),
colorup = "green", colordown = "red", width = stick * .4)
# Plot other series (such as moving averages) as lines
if otherseries != None:
if type(otherseries) != list:
otherseries = [otherseries]
dat.loc[:,otherseries].plot(ax = ax, lw = 1.3, grid = True)
ax.xaxis_date()
ax.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
sns.set(rc={'figure.figsize':(20, 10)})
plt.style.use('seaborn-whitegrid')
plt.title(f"Candlestick chart of {txt}", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Stock Price (p)', color = 'black', fontsize = 15);
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
from datetime import datetime
# Ensure matplotlib inline plots in Jupyter Notebooks
%matplotlib inline
# Sample data - replace this with your actual data loading code
# Example data loading for demonstration purposes
start_date = '2023-01-01'
end_date = '2024-12-31'
ticker = 'NVDA'
data = yf.Ticker(ticker)
df = data.history(start=start_date, end=end_date)
# Reset index to have Date as a column
df.reset_index(inplace=True)
# Ensure Date is in datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Set Date as index
df.set_index('Date', inplace=True)
# Prepare the data for mplfinance
mpf_data = df[['Open', 'High', 'Low', 'Close', 'Volume']]
# Plot candlestick chart
mpf.plot(mpf_data, type='candle', style='charles', title=f'{ticker} Stock Prices from {start_date} - {end_date}', ylabel='Price', volume=True)
A technical indicator is a series of data points that are derived by applying a formula to the price data of a security. Basically, they are price-derived indicators that use formulas to translate the momentum or price levels into quantifiable time series.
There are two categories of indicator: leading and lagging, and four types: trend, momentum, volatility and volume, which serve three broad functions: to alert, to confirm and to predict
5.1 Trend-following strategies Trend-following is about profiting from the prevailing trend through buying an asset when its price trend goes up, and selling when its trend goes down, expecting price movements to continue.
5.1.1 Moving averages Moving averages smooth a series filtering out noise to help identify trends, one of the fundamental principles of technical analysis being that prices move in trends. Types of moving averages include simple, exponential, smoothed, linear-weighted, MACD, and as lagging indicators they follow the price action and are commonly referred to as trend-following indicators.
5.1.2 Simple Moving Average (SMA) The simplest form of a moving average, known as a Simple Moving Average (SMA), is calculated by taking the arithmetic mean of a given set of values over a set time period. This model is probably the most naive approach to time series modelling and simply states that the next observation is the mean of all past observations and each value in the time period carries equal weight.
Modelling this an as average calculation problem we would try to predict the future stock market prices (for example, xt+1 ) as an average of the previously observed stock market prices within a fixed size window (for example, xt-n, ..., xt). This helps smooth out the price data by creating a constantly updated average price so that the impacts of random, short-term fluctuations on the price of a stock over a specified time-frame are mitigated.
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
# Set ticker and title
ticker = 'NVDA'
title_txt = "20-day Simple Moving Average for NVDA stock"
label_txt = "NVDA Adj Close"
# Load/Read Data
yf.pdr_override()
# Define company tickers
tech_list = ['NVDA']
# Download stock data for the past 7 years
end = datetime.now()
start = datetime(end.year - 7, end.month, end.day)
# Initialize empty DataFrame
df = pd.DataFrame()
# Download and concatenate stock data
for stock in tech_list:
temp_df = yf.download(stock, start=start, end=end)
temp_df['Ticker'] = stock
df = pd.concat([df, temp_df])
# Reset index to make 'Date' a column
df.reset_index(inplace=True)
# Print the column names to verify
print("Columns in DataFrame:", df.columns)
# If 'Adj Close' exists, calculate the 20-day SMA
if 'Adj Close' in df.columns:
df['SMA_20'] = df['Adj Close'].rolling(window=20).mean()
# Plot the adjusted close and the SMA
plt.figure(figsize=(12, 6))
plt.plot(df['Date'], df['Adj Close'], label='Adj Close', color='blue')
plt.plot(df['Date'], df['SMA_20'], label='20-Day SMA', color='red')
plt.title(title_txt)
plt.xlabel('Date')
plt.ylabel(label_txt)
plt.legend()
plt.grid(True)
plt.show()
else:
print("Column 'Adj Close' not found in DataFrame.")
[*********************100%%**********************] 1 of 1 completed
Columns in DataFrame: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
'Ticker'],
dtype='object')
The SMA follows the time series removing noise from the signal and keeping the relevant information about the trend. If the stock price is above its moving average it is assumed that it will likely continue rising in an uptrend.
5.1.3 Moving Average Crossover Strategy The most popular moving average crossover strategy, and the "Hello World!" of quantitative trading, being the easiest to construct, is based on the simple moving average. When moving averages cross, it is usually confirmation of a change in the prevailing trend, and we want to test whether over the long term the lag caused by the moving average can still give us profitable trades.
Depending on the type of investor or trader (high risk vs. low risk, short-term vs. long-term trading), you can adjust your moving ‘time’ average (10 days, 20 days, 50 days, 200 days, 1 year, 5 years, etc). The longer the period of an SMA, the longer the time horizon of the trend it spots. The most commonly used SMA periods are 20 for short-term (swing) trading, 50 for medium-term (position) trading and 200 for long-term (portfolio) trading.
There is no single right answer and this will vary according to whether a trader is planning to buy when the trend is going down and sell when it's going up, potentially making short-term gains, or to hold for a more long-term investment.
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
# Set ticker and title
ticker = 'NVDA'
title_txt = "20, 50, and 200-day Moving Averages for NVDA Stock"
label_txt = "NVDA Adj Close"
# Load/Read Data
yf.pdr_override()
# Define company tickers
tech_list = [ticker]
# Download stock data for the past 7 years
end = datetime.now()
start = datetime(end.year - 7, end.month, end.day)
# Initialize empty DataFrame
df = pd.DataFrame()
# Download and concatenate stock data
for stock in tech_list:
temp_df = yf.download(stock, start=start, end=end)
temp_df['Ticker'] = stock
df = pd.concat([df, temp_df])
# Reset index to make 'Date' a column
df.reset_index(inplace=True)
# Print the column names to verify
print("Columns in DataFrame:", df.columns)
def sma2():
plt.figure(figsize=(15,9))
# Calculate moving averages
df['SMA_20'] = df['Adj Close'].rolling(window=20).mean()
df['SMA_50'] = df['Adj Close'].rolling(window=50).mean()
df['SMA_200'] = df['Adj Close'].rolling(window=200).mean()
# Plot moving averages
plt.plot(df['Date'], df['SMA_20'], label='20 Day Avg', color='orange')
plt.plot(df['Date'], df['SMA_50'], label='50 Day Avg', color='green')
plt.plot(df['Date'], df['SMA_200'], label='200 Day Avg', color='blue')
plt.plot(df['Date'], df['Adj Close'], label=label_txt, color='black')
plt.title(title_txt, color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (USD)', color='black', fontsize=15)
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
sma2()
[*********************100%%**********************] 1 of 1 completed
Columns in DataFrame: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
'Ticker'],
dtype='object')
The chart shows that the 20-day moving average is the most sensitive to local changes, and the 200-day moving average the least. Here, the 200-day moving average indicates an overall bullish trend - the stock is trending upward over time. The 20- and 50-day moving averages are at times bearish and at other times bullish.
The major drawback of moving averages, however, is that because they are lagging, and smooth out prices, they tend to recognise reversals too late and are therefore not very helpful when used alone.
Trading Strategy The moving average crossover trading strategy will be to take two moving averages - 20-day (fast) and 200-day (slow) - and to go long (buy) when the fast MA goes above the slow MA and to go short (sell) when the fast MA goes below the slow MA.
# Create copy of dataframe for AstraZeneca data for 2010-2019
nvda_sma = temp_df.copy()
nvda_sma
| Open | High | Low | Close | Adj Close | Volume | Ticker | |
|---|---|---|---|---|---|---|---|
| Date | |||||||
| 2017-07-28 | 4.007250 | 4.134500 | 3.982000 | 4.109750 | 4.059559 | 513348000 | NVDA |
| 2017-07-31 | 4.123500 | 4.160000 | 4.015500 | 4.062750 | 4.013134 | 559672000 | NVDA |
| 2017-08-01 | 4.053250 | 4.114000 | 4.025000 | 4.112250 | 4.062028 | 431384000 | NVDA |
| 2017-08-02 | 4.143750 | 4.147750 | 4.031750 | 4.109750 | 4.059559 | 478444000 | NVDA |
| 2017-08-03 | 4.109250 | 4.165750 | 4.092000 | 4.162000 | 4.111171 | 442420000 | NVDA |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2024-07-22 | 120.349998 | 124.070000 | 119.860001 | 123.540001 | 123.540001 | 258068900 | NVDA |
| 2024-07-23 | 122.779999 | 124.690002 | 122.099998 | 122.589996 | 122.589996 | 173911000 | NVDA |
| 2024-07-24 | 119.169998 | 119.949997 | 113.440002 | 114.250000 | 114.250000 | 327776900 | NVDA |
| 2024-07-25 | 113.040001 | 116.629997 | 106.300003 | 112.279999 | 112.279999 | 460067000 | NVDA |
| 2024-07-26 | 116.190002 | 116.199997 | 111.580002 | 113.059998 | 113.059998 | 292831600 | NVDA |
1760 rows × 7 columns
# Calculate and add columns for moving averages of Adjusted Close price data
nvda_sma["20d"] = np.round(nvda_sma["Adj Close"].rolling(window = 20, center = False).mean(), 2)
nvda_sma["50d"] = np.round(nvda_sma["Adj Close"].rolling(window = 50, center = False).mean(), 2)
nvda_sma["200d"] = np.round(nvda_sma["Adj Close"].rolling(window = 200, center = False).mean(), 2)
nvda_sma.tail()
| Open | High | Low | Close | Adj Close | Volume | Ticker | 20d | 50d | 200d | |
|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||
| 2024-07-22 | 120.349998 | 124.070000 | 119.860001 | 123.540001 | 123.540001 | 258068900 | NVDA | 125.28 | 116.81 | 77.54 |
| 2024-07-23 | 122.779999 | 124.690002 | 122.099998 | 122.589996 | 122.589996 | 173911000 | NVDA | 125.51 | 117.49 | 77.93 |
| 2024-07-24 | 119.169998 | 119.949997 | 113.440002 | 114.250000 | 114.250000 | 327776900 | NVDA | 124.92 | 117.98 | 78.28 |
| 2024-07-25 | 113.040001 | 116.629997 | 106.300003 | 112.279999 | 112.279999 | 460067000 | NVDA | 124.21 | 118.42 | 78.61 |
| 2024-07-26 | 116.190002 | 116.199997 | 111.580002 | 113.059998 | 113.059998 | 292831600 | NVDA | 123.66 | 118.85 | 78.95 |
txt = "20, 50 and 200 day moving averages for NVDA stock"
# Slice rows to plot data from 2018-2024
pandas_candlestick_ohlc(nvda_sma.loc['2018-01-01':'2024-12-31',:], otherseries = ["20d", "50d", "200d"])
Backtesting Before using the strategy we will evaluate the quality of it first by backtesting, or looking at how profitable it is on historical data.
# Identify when the 20-day average is below the 200-day average, and vice versa.
nvda_sma['20d-200d'] = nvda_sma['20d'] - nvda_sma['200d']
nvda_sma.tail()
| Open | High | Low | Close | Adj Close | Volume | Ticker | 20d | 50d | 200d | 20d-200d | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||
| 2024-07-22 | 120.349998 | 124.070000 | 119.860001 | 123.540001 | 123.540001 | 258068900 | NVDA | 125.28 | 116.81 | 77.54 | 47.74 |
| 2024-07-23 | 122.779999 | 124.690002 | 122.099998 | 122.589996 | 122.589996 | 173911000 | NVDA | 125.51 | 117.49 | 77.93 | 47.58 |
| 2024-07-24 | 119.169998 | 119.949997 | 113.440002 | 114.250000 | 114.250000 | 327776900 | NVDA | 124.92 | 117.98 | 78.28 | 46.64 |
| 2024-07-25 | 113.040001 | 116.629997 | 106.300003 | 112.279999 | 112.279999 | 460067000 | NVDA | 124.21 | 118.42 | 78.61 | 45.60 |
| 2024-07-26 | 116.190002 | 116.199997 | 111.580002 | 113.059998 | 113.059998 | 292831600 | NVDA | 123.66 | 118.85 | 78.95 | 44.71 |
# The sign of this difference is the regime; that is, if the fast moving average is above the slow moving average,
# this is a bullish regime, and a bearish regime holds when the fast moving average is below the slow moving average
# np.where() is a vectorized if-else function, where a condition is checked for each component of a vector, and the first argument passed is used when the condition holds, and the other passed if it does not
nvda_sma["Regime"] = np.where(nvda_sma['20d-200d'] > 0, 1, 0)
# We have 1's for bullish regimes and 0's for everything else. Replace bearish regime's values with -1, and to maintain the rest of the vector, the second argument is nvda_sma["Regime"]
nvda_sma["Regime"] = np.where(nvda_sma['20d-200d'] < 0, -1, nvda_sma["Regime"])
nvda_sma.loc['2018-01-01':'2024-12-31',"Regime"].plot(ylim = (-2,2)).axhline(y = 0, color = "black", lw = 2);
plt.title("Regime for NVDA 20- and 200-day Moving Average Crossover Strategy for 2018-2024", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Regime', color = 'black', fontsize = 15);
nvda_sma["Regime"].plot(ylim = (-2,2)).axhline(y = 0, color = "black", lw = 2);
plt.title("Regime for NVDA 20- and 200-day Moving Average Crossover Strategy for 2018-2024", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Regime', color = 'black', fontsize = 15);
# Number of bullish and bearish days
nvda_sma["Regime"].value_counts()
Regime 1 1172 -1 388 0 200 Name: count, dtype: int64
For 1172 days the market was bullish, for 388 days it was bearish, and neutral for 200 days for the time period 2018-2024.
nvda_sma
| Open | High | Low | Close | Adj Close | Volume | Ticker | 20d | 50d | 200d | 20d-200d | Regime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||||
| 2017-07-28 | 4.007250 | 4.134500 | 3.982000 | 4.109750 | 4.059559 | 513348000 | NVDA | NaN | NaN | NaN | NaN | 0 |
| 2017-07-31 | 4.123500 | 4.160000 | 4.015500 | 4.062750 | 4.013134 | 559672000 | NVDA | NaN | NaN | NaN | NaN | 0 |
| 2017-08-01 | 4.053250 | 4.114000 | 4.025000 | 4.112250 | 4.062028 | 431384000 | NVDA | NaN | NaN | NaN | NaN | 0 |
| 2017-08-02 | 4.143750 | 4.147750 | 4.031750 | 4.109750 | 4.059559 | 478444000 | NVDA | NaN | NaN | NaN | NaN | 0 |
| 2017-08-03 | 4.109250 | 4.165750 | 4.092000 | 4.162000 | 4.111171 | 442420000 | NVDA | NaN | NaN | NaN | NaN | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2024-07-22 | 120.349998 | 124.070000 | 119.860001 | 123.540001 | 123.540001 | 258068900 | NVDA | 125.28 | 116.81 | 77.54 | 47.74 | 1 |
| 2024-07-23 | 122.779999 | 124.690002 | 122.099998 | 122.589996 | 122.589996 | 173911000 | NVDA | 125.51 | 117.49 | 77.93 | 47.58 | 1 |
| 2024-07-24 | 119.169998 | 119.949997 | 113.440002 | 114.250000 | 114.250000 | 327776900 | NVDA | 124.92 | 117.98 | 78.28 | 46.64 | 1 |
| 2024-07-25 | 113.040001 | 116.629997 | 106.300003 | 112.279999 | 112.279999 | 460067000 | NVDA | 124.21 | 118.42 | 78.61 | 45.60 | 1 |
| 2024-07-26 | 116.190002 | 116.199997 | 111.580002 | 113.059998 | 113.059998 | 292831600 | NVDA | 123.66 | 118.85 | 78.95 | 44.71 | 1 |
1760 rows × 12 columns
# Obtain signals with -1 indicating “sell”, 1 indicating “buy”, and 0 no action
# To ensure that all trades close out, temporarily change the regime of the last row to 0
regime_orig = nvda_sma.iloc[-1, 10]
nvda_sma.iloc[-1, 10] = 0
nvda_sma["Signal"] = np.sign(nvda_sma["Regime"] - nvda_sma["Regime"].shift(1))
# Restore original regime data
nvda_sma.iloc[-1, 10] = regime_orig
nvda_sma.tail()
| Open | High | Low | Close | Adj Close | Volume | Ticker | 20d | 50d | 200d | 20d-200d | Regime | Signal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||
| 2024-07-22 | 120.349998 | 124.070000 | 119.860001 | 123.540001 | 123.540001 | 258068900 | NVDA | 125.28 | 116.81 | 77.54 | 47.74 | 1 | 0.0 |
| 2024-07-23 | 122.779999 | 124.690002 | 122.099998 | 122.589996 | 122.589996 | 173911000 | NVDA | 125.51 | 117.49 | 77.93 | 47.58 | 1 | 0.0 |
| 2024-07-24 | 119.169998 | 119.949997 | 113.440002 | 114.250000 | 114.250000 | 327776900 | NVDA | 124.92 | 117.98 | 78.28 | 46.64 | 1 | 0.0 |
| 2024-07-25 | 113.040001 | 116.629997 | 106.300003 | 112.279999 | 112.279999 | 460067000 | NVDA | 124.21 | 118.42 | 78.61 | 45.60 | 1 | 0.0 |
| 2024-07-26 | 116.190002 | 116.199997 | 111.580002 | 113.059998 | 113.059998 | 292831600 | NVDA | 123.66 | 118.85 | 78.95 | 44.71 | 1 | 0.0 |
nvda_sma["Signal"].plot(ylim = (-2, 2));
plt.title("Trading signals for NVDA 20- and 200-day Moving Average Crossover Strategy for 2018-2024", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Trading signal', color = 'black', fontsize = 15);
# Unique counts of trading signals
nvda_sma["Signal"].value_counts()
Signal 0.0 1749 1.0 6 -1.0 4 Name: count, dtype: int64
We would buy NVDA stock 6 times and sell 4 times. If we only go long 6 trades will be engaged in over the 6-year period, while if we pivot from a long to a short position every time a long position is terminated, we would engage in 6 trades total. It is worth bearing in mind that trading more frequently isn’t necessarily good as trades are never free.
# Identify what the price of the stock is at every buy.
nvda_sma.loc[nvda_sma["Signal"] == 1, "Close"]
Date 2018-05-14 6.38400 2019-07-26 4.37675 2019-07-29 4.37050 2019-08-30 4.18775 2022-03-24 28.15000 2023-01-26 19.80200 Name: Close, dtype: float64
# Identify what the price of the stock is at every sell.
nvda_sma.loc[nvda_sma["Signal"] == -1, "Close"]
Date 2018-10-26 4.957250 2019-08-27 4.045000 2022-03-14 21.330000 2022-04-22 19.514999 Name: Close, dtype: float64
# Create a dataframe with trades, including the price at the trade and the regime under which the trade is made.
nvda_signals = pd.concat([
pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == 1, "Adj Close"],
"Regime": nvda_sma.loc[nvda_sma["Signal"] == 1, "Regime"],
"Signal": "Buy"}),
pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == -1, "Adj Close"],
"Regime": nvda_sma.loc[nvda_sma["Signal"] == -1, "Regime"],
"Signal": "Sell"}),
])
nvda_signals.sort_index(inplace = True)
nvda_signals
| Price | Regime | Signal | |
|---|---|---|---|
| Date | |||
| 2018-05-14 | 6.319897 | 1 | Buy |
| 2018-10-26 | 4.913193 | -1 | Sell |
| 2019-07-26 | 4.351629 | 0 | Buy |
| 2019-07-29 | 4.345416 | 1 | Buy |
| 2019-08-27 | 4.021783 | -1 | Sell |
| 2019-08-30 | 4.167836 | 1 | Buy |
| 2022-03-14 | 21.301502 | -1 | Sell |
| 2022-03-24 | 28.112389 | 1 | Buy |
| 2022-04-22 | 19.488928 | -1 | Sell |
| 2023-01-26 | 19.790665 | 1 | Buy |
# Ensure previous_buy_signals aligns with buy_signals
buy_signals = nvda_signals[nvda_signals['Signal'] == 'Buy']
previous_buy_signals = buy_signals.shift(1)
# Create DataFrame for long trade profits
nvda_long_profits = pd.DataFrame({
"Entry Price": buy_signals["Price"],
"Previous Buy Price": previous_buy_signals["Price"].values,
"Profit": buy_signals["Price"].values - previous_buy_signals["Price"].values,
"End Date": buy_signals.index
}).dropna() # Drop rows with NaN values in 'Profit'
# Print the nvda_long_profits DataFrame
print("Columns in nvda_long_profits:", nvda_long_profits.columns)
print(nvda_long_profits)
Columns in nvda_long_profits: Index(['Entry Price', 'Previous Buy Price', 'Profit', 'End Date'], dtype='object')
Entry Price Previous Buy Price Profit End Date
23 4.095017 4.082905 0.012112 23
24 4.187952 4.095017 0.092935 24
25 4.213162 4.187952 0.025210 25
26 4.100701 4.213162 -0.112461 26
27 4.098230 4.100701 -0.002471 27
... ... ... ... ...
1745 128.199997 125.830002 2.369995 1745
1746 131.380005 128.199997 3.180008 1746
1747 134.910004 131.380005 3.529999 1747
1749 129.240005 134.910004 -5.669998 1749
1750 128.440002 129.240005 -0.800003 1750
[1129 rows x 4 columns]
5.1.4 Exponential Moving Average In a Simple Moving Average, each value in the time period carries equal weight, and values outside of the time period are not included in the average. However, the Exponential Moving Average is a cumulative calculation where a different decreasing weight is assigned to each observation. Past values have a diminishing contribution to the average, while more recent values have a greater contribution. This method allows the moving average to be more responsive to changes in the data.
# Set ticker and title
ticker = 'NVDA'
title_txt = "20-day Exponential Moving Average for NVDA stock"
label_txt = "NVDA Adj Close"
# Download NVDA stock data for the year 2024
end = datetime(2024, 12, 31)
start = datetime(2024, 1, 1)
# Download stock data for the specified ticker
df = yf.download(ticker, start=start, end=end)
# Define ewma function
def ewma():
plt.figure(figsize=(15, 9))
# Calculate and plot 20-day EMA
df['20_Day_EMA'] = df['Adj Close'].ewm(span=20, adjust=False).mean()
df['Adj Close'].plot(label=label_txt, color='blue')
df['20_Day_EMA'].plot(label='20 Day EMA', color='red')
plt.title(title_txt, color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend()
plt.grid(True)
plt.show()
# Call the function to plot
ewma()
[*********************100%%**********************] 1 of 1 completed
# Set ticker and title
ticker = 'NVDA'
title_txt = "20-, 50-, and 200-day Exponential Moving Averages for NVDA stock"
label_txt = "NVDA Adj Close"
# Download NVDA stock data for the period 2016-2019
start = datetime(2018, 1, 1)
end = datetime(2024, 12, 31)
# Download stock data for the specified ticker
df = yf.download(ticker, start=start, end=end)
# Define ewma2 function
def ewma2():
plt.figure(figsize=(15, 9))
# Calculate and plot 20-day EMA
df['20_Day_EMA'] = df['Adj Close'].ewm(span=20, adjust=False).mean()
df['50_Day_EMA'] = df['Adj Close'].ewm(span=50, adjust=False).mean()
df['200_Day_EMA'] = df['Adj Close'].ewm(span=200, adjust=False).mean()
# Plot adjusted close and EMAs
df['Adj Close'].plot(label=label_txt, color='blue')
df['20_Day_EMA'].plot(label='20 Day EMA', color='red')
df['50_Day_EMA'].plot(label='50 Day EMA', color='green')
df['200_Day_EMA'].plot(label='200 Day EMA', color='orange')
plt.title(title_txt, color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend()
plt.grid(True)
plt.show()
# Call the function to plot
ewma2()
[*********************100%%**********************] 1 of 1 completed
5.1.5 Triple Moving Average Crossover Strategy This strategy uses three moving moving averages - short/fast, middle/medium and long/slow - and has two buy and sell signals.
The first is to buy when the middle/medium moving average crosses above the long/slow moving average and the short/fast moving average crosses above the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses below the middle/medium moving average.
The second is to buy when the middle/medium moving average crosses below the long/slow moving average and the short/fast moving average crosses below the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses above the middle/medium moving average.
nvda_sma[['Adj Close']]['2024-05-01':'2024-10-31']
| Adj Close | |
|---|---|
| Date | |
| 2024-05-01 | 83.034180 |
| 2024-05-02 | 85.809952 |
| 2024-05-03 | 88.781708 |
| 2024-05-06 | 92.132431 |
| 2024-05-07 | 90.546562 |
| 2024-05-08 | 90.404579 |
| 2024-05-09 | 88.739716 |
| 2024-05-10 | 89.870613 |
| 2024-05-13 | 90.391579 |
| 2024-05-14 | 91.348495 |
| 2024-05-15 | 94.622223 |
| 2024-05-16 | 94.351250 |
| 2024-05-17 | 92.471397 |
| 2024-05-20 | 94.772217 |
| 2024-05-21 | 95.378166 |
| 2024-05-22 | 94.942200 |
| 2024-05-23 | 103.790482 |
| 2024-05-24 | 106.460258 |
| 2024-05-28 | 113.891647 |
| 2024-05-29 | 114.815567 |
| 2024-05-30 | 110.490921 |
| 2024-05-31 | 109.624001 |
| 2024-06-03 | 114.990555 |
| 2024-06-04 | 116.427429 |
| 2024-06-05 | 122.429947 |
| 2024-06-06 | 120.988060 |
| 2024-06-07 | 120.878075 |
| 2024-06-10 | 121.779999 |
| 2024-06-11 | 120.910004 |
| 2024-06-12 | 125.199997 |
| 2024-06-13 | 129.610001 |
| 2024-06-14 | 131.880005 |
| 2024-06-17 | 130.979996 |
| 2024-06-18 | 135.580002 |
| 2024-06-20 | 130.779999 |
| 2024-06-21 | 126.570000 |
| 2024-06-24 | 118.110001 |
| 2024-06-25 | 126.089996 |
| 2024-06-26 | 126.400002 |
| 2024-06-27 | 123.989998 |
| 2024-06-28 | 123.540001 |
| 2024-07-01 | 124.300003 |
| 2024-07-02 | 122.669998 |
| 2024-07-03 | 128.279999 |
| 2024-07-05 | 125.830002 |
| 2024-07-08 | 128.199997 |
| 2024-07-09 | 131.380005 |
| 2024-07-10 | 134.910004 |
| 2024-07-11 | 127.400002 |
| 2024-07-12 | 129.240005 |
| 2024-07-15 | 128.440002 |
| 2024-07-16 | 126.360001 |
| 2024-07-17 | 117.989998 |
| 2024-07-18 | 121.089996 |
| 2024-07-19 | 117.930000 |
| 2024-07-22 | 123.540001 |
| 2024-07-23 | 122.589996 |
| 2024-07-24 | 114.250000 |
| 2024-07-25 | 112.279999 |
| 2024-07-26 | 113.059998 |
# Identify what the price of the stock is at every sell.
nvda_sma.loc[nvda_sma["Signal"] == -1, "Close"]
Date 2018-10-26 4.957250 2019-08-27 4.045000 2022-03-14 21.330000 2022-04-22 19.514999 Name: Close, dtype: float64
# Create a dataframe with trades, including the price at the trade and the regime under which the trade is made.
nvda_signals = pd.concat([
pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == 1, "Adj Close"],
"Regime": nvda_sma.loc[nvda_sma["Signal"] == 1, "Regime"],
"Signal": "Buy"}),
pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == -1, "Adj Close"],
"Regime": nvda_sma.loc[nvda_sma["Signal"] == -1, "Regime"],
"Signal": "Sell"}),
])
nvda_signals.sort_index(inplace = True)
nvda_signals
| Price | Regime | Signal | |
|---|---|---|---|
| Date | |||
| 2018-05-14 | 6.319897 | 1 | Buy |
| 2018-10-26 | 4.913193 | -1 | Sell |
| 2019-07-26 | 4.351629 | 0 | Buy |
| 2019-07-29 | 4.345416 | 1 | Buy |
| 2019-08-27 | 4.021783 | -1 | Sell |
| 2019-08-30 | 4.167836 | 1 | Buy |
| 2022-03-14 | 21.301502 | -1 | Sell |
| 2022-03-24 | 28.112389 | 1 | Buy |
| 2022-04-22 | 19.488928 | -1 | Sell |
| 2023-01-26 | 19.790665 | 1 | Buy |
# Let's see the profitability of long trades
# Create DataFrame for long trade profits
nvda_long_profits = pd.DataFrame({
"Price": buy_signals["Price"],
"Previous Buy Price": previous_buy_signals["Price"].reindex(buy_signals.index).values,
"Profit": buy_signals["Price"].values - previous_buy_signals["Price"].reindex(buy_signals.index).values,
"End Date": buy_signals.index
}).dropna() # Drop rows with NaN values in 'Profit'
# Print the nvda_long_profits DataFrame
print(nvda_long_profits)
Price Previous Buy Price Profit End Date 23 4.095017 4.082905 0.012112 23 24 4.187952 4.095017 0.092935 24 25 4.213162 4.187952 0.025210 25 26 4.100701 4.213162 -0.112461 26 27 4.098230 4.100701 -0.002471 27 ... ... ... ... ... 1745 128.199997 125.830002 2.369995 1745 1746 131.380005 128.199997 3.180008 1746 1747 134.910004 131.380005 3.529999 1747 1749 129.240005 134.910004 -5.669998 1749 1750 128.440002 129.240005 -0.800003 1750 [1129 rows x 4 columns]
5.1.5 Triple Moving Average Crossover Strategy This strategy uses three moving moving averages - short/fast, middle/medium and long/slow - and has two buy and sell signals.
The first is to buy when the middle/medium moving average crosses above the long/slow moving average and the short/fast moving average crosses above the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses below the middle/medium moving average.
The second is to buy when the middle/medium moving average crosses below the long/slow moving average and the short/fast moving average crosses below the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses above the middle/medium moving average.
# Define the function for plotting EMAs
def ewma3():
sns.set(rc={'figure.figsize':(15, 9)})
# Extract data for the 6-month period
nvda_adj_6mo1 = nvda_sma[['Adj Close']]['2024-05-01':'2024-10-31']
# Calculate EMAs
ShortEMA = nvda_adj_6mo1['Adj Close'].ewm(span=5, adjust=False).mean()
MiddleEMA = nvda_adj_6mo1['Adj Close'].ewm(span=21, adjust=False).mean()
LongEMA = nvda_adj_6mo1['Adj Close'].ewm(span=63, adjust=False).mean()
# Add EMAs to the DataFrame
nvda_adj_6mo1['Short'] = ShortEMA
nvda_adj_6mo1['Middle'] = MiddleEMA
nvda_adj_6mo1['Long'] = LongEMA
# Plotting
plt.plot(nvda_adj_6mo1['Adj Close'], label=f"{label_txt}", color='blue')
plt.plot(ShortEMA, label='Short/Fast EMA', color='red')
plt.plot(MiddleEMA, label='Middle/Medium EMA', color='orange')
plt.plot(LongEMA, label='Long/Slow EMA', color='green')
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend()
plt.show()
return nvda_adj_6mo1
# Set ticker and title
ticker = 'NVDA'
title_txt = "Triple Exponential Moving Average Crossover for NVDA stock"
label_txt = "NVDA Adj Close"
# Call the function to plot and get the DataFrame
nvda_adj_6mo1 = ewma3()
# Now nvda_adj_6mo1 contains the EMAs
print(nvda_adj_6mo1)
Adj Close Short Middle Long Date 2024-05-01 83.034180 83.034180 83.034180 83.034180 2024-05-02 85.809952 83.959437 83.286523 83.120923 2024-05-03 88.781708 85.566861 83.786085 83.297822 2024-05-06 92.132431 87.755384 84.544844 83.573904 2024-05-07 90.546562 88.685777 85.090454 83.791799 2024-05-08 90.404579 89.258711 85.573557 83.998449 2024-05-09 88.739716 89.085712 85.861389 84.146613 2024-05-10 89.870613 89.347346 86.225864 84.325488 2024-05-13 90.391579 89.695424 86.604565 84.515054 2024-05-14 91.348495 90.246448 87.035832 84.728599 2024-05-15 94.622223 91.705039 87.725504 85.037774 2024-05-16 94.351250 92.587109 88.327844 85.328820 2024-05-17 92.471397 92.548539 88.704531 85.552026 2024-05-20 94.772217 93.289765 89.256139 85.840157 2024-05-21 95.378166 93.985899 89.812687 86.138220 2024-05-22 94.942200 94.304666 90.279006 86.413344 2024-05-23 103.790482 97.466604 91.507322 86.956380 2024-05-24 106.460258 100.464489 92.866680 87.565876 2024-05-28 113.891647 104.940208 94.778041 88.388556 2024-05-29 114.815567 108.231995 96.599634 89.214400 2024-05-30 110.490921 108.984970 97.862478 89.879292 2024-05-31 109.624001 109.197980 98.931707 90.496314 2024-06-03 114.990555 111.128838 100.391603 91.261759 2024-06-04 116.427429 112.895035 101.849405 92.048186 2024-06-05 122.429947 116.073339 103.720363 92.997616 2024-06-06 120.988060 117.711579 105.290154 93.872317 2024-06-07 120.878075 118.767078 106.707238 94.716247 2024-06-10 121.779999 119.771385 108.077489 95.561990 2024-06-11 120.910004 120.150924 109.244081 96.354115 2024-06-12 125.199997 121.833949 110.694619 97.255549 2024-06-13 129.610001 124.425966 112.414199 98.266625 2024-06-14 131.880005 126.910646 114.183818 99.317043 2024-06-17 130.979996 128.267096 115.710743 100.306511 2024-06-18 135.580002 130.704731 117.517039 101.408807 2024-06-20 130.779999 130.729820 118.722763 102.326657 2024-06-21 126.570000 129.343213 119.436148 103.084262 2024-06-24 118.110001 125.598809 119.315589 103.553816 2024-06-25 126.089996 125.762538 119.931444 104.258072 2024-06-26 126.400002 125.975026 120.519495 104.950007 2024-06-27 123.989998 125.313350 120.834995 105.545007 2024-06-28 123.540001 124.722234 121.080905 106.107350 2024-07-01 124.300003 124.581490 121.373550 106.675871 2024-07-02 122.669998 123.944326 121.491409 107.175687 2024-07-03 128.279999 125.389550 122.108554 107.835197 2024-07-05 125.830002 125.536368 122.446867 108.397534 2024-07-08 128.199997 126.424244 122.969879 109.016361 2024-07-09 131.380005 128.076164 123.734436 109.715225 2024-07-10 134.910004 130.354111 124.750396 110.502562 2024-07-11 127.400002 129.369408 124.991270 111.030607 2024-07-12 129.240005 129.326274 125.377518 111.599651 2024-07-15 128.440002 129.030850 125.655926 112.125912 2024-07-16 126.360001 128.140567 125.719933 112.570727 2024-07-17 117.989998 124.757044 125.017211 112.740079 2024-07-18 121.089996 123.534695 124.660192 113.001014 2024-07-19 117.930000 121.666463 124.048356 113.155045 2024-07-22 123.540001 122.290976 124.002142 113.479575 2024-07-23 122.589996 122.390649 123.873765 113.764276 2024-07-24 114.250000 119.677100 122.998878 113.779454 2024-07-25 112.279999 117.211399 122.024434 113.732596 2024-07-26 113.059998 115.827599 121.209485 113.711578
# Define the function for buy/sell signals based on EMAs
def buy_sell_ewma3(data):
buy_list = []
sell_list = []
flag_long = False
flag_short = False
for i in range(0, len(data)):
if data['Middle'][i] < data['Long'][i] and data['Short'][i] < data['Middle'][i] and flag_long == False and flag_short == False:
buy_list.append(data['Adj Close'][i])
sell_list.append(np.nan)
flag_short = True
elif flag_short == True and data['Short'][i] > data['Middle'][i]:
sell_list.append(data['Adj Close'][i])
buy_list.append(np.nan)
flag_short = False
elif data['Middle'][i] > data['Long'][i] and data['Short'][i] > data['Middle'][i] and flag_long == False and flag_short == False:
buy_list.append(data['Adj Close'][i])
sell_list.append(np.nan)
flag_long = True
elif flag_long == True and data['Short'][i] < data['Middle'][i]:
sell_list.append(data['Adj Close'][i])
buy_list.append(np.nan)
flag_long = False
else:
buy_list.append(np.nan)
sell_list.append(np.nan)
return buy_list, sell_list
# Calculate the EMAs and add them to the DataFrame
nvda_adj_6mo1 = ewma3()
nvda_adj_6mo1['Short'] = nvda_adj_6mo1['Adj Close'].ewm(span=5, adjust=False).mean()
nvda_adj_6mo1['Middle'] = nvda_adj_6mo1['Adj Close'].ewm(span=21, adjust=False).mean()
nvda_adj_6mo1['Long'] = nvda_adj_6mo1['Adj Close'].ewm(span=63, adjust=False).mean()
# Generate buy/sell signals
buy_signals, sell_signals = buy_sell_ewma3(nvda_adj_6mo1)
# Add buy/sell signals to the DataFrame
nvda_adj_6mo1['Buy_Signal'] = buy_signals
nvda_adj_6mo1['Sell_Signal'] = sell_signals
# Display the DataFrame with signals
print(nvda_adj_6mo1)
Adj Close Short Middle Long Buy_Signal
Date
2024-05-01 83.034180 83.034180 83.034180 83.034180 NaN \
2024-05-02 85.809952 83.959437 83.286523 83.120923 85.809952
2024-05-03 88.781708 85.566861 83.786085 83.297822 NaN
2024-05-06 92.132431 87.755384 84.544844 83.573904 NaN
2024-05-07 90.546562 88.685777 85.090454 83.791799 NaN
2024-05-08 90.404579 89.258711 85.573557 83.998449 NaN
2024-05-09 88.739716 89.085712 85.861389 84.146613 NaN
2024-05-10 89.870613 89.347346 86.225864 84.325488 NaN
2024-05-13 90.391579 89.695424 86.604565 84.515054 NaN
2024-05-14 91.348495 90.246448 87.035832 84.728599 NaN
2024-05-15 94.622223 91.705039 87.725504 85.037774 NaN
2024-05-16 94.351250 92.587109 88.327844 85.328820 NaN
2024-05-17 92.471397 92.548539 88.704531 85.552026 NaN
2024-05-20 94.772217 93.289765 89.256139 85.840157 NaN
2024-05-21 95.378166 93.985899 89.812687 86.138220 NaN
2024-05-22 94.942200 94.304666 90.279006 86.413344 NaN
2024-05-23 103.790482 97.466604 91.507322 86.956380 NaN
2024-05-24 106.460258 100.464489 92.866680 87.565876 NaN
2024-05-28 113.891647 104.940208 94.778041 88.388556 NaN
2024-05-29 114.815567 108.231995 96.599634 89.214400 NaN
2024-05-30 110.490921 108.984970 97.862478 89.879292 NaN
2024-05-31 109.624001 109.197980 98.931707 90.496314 NaN
2024-06-03 114.990555 111.128838 100.391603 91.261759 NaN
2024-06-04 116.427429 112.895035 101.849405 92.048186 NaN
2024-06-05 122.429947 116.073339 103.720363 92.997616 NaN
2024-06-06 120.988060 117.711579 105.290154 93.872317 NaN
2024-06-07 120.878075 118.767078 106.707238 94.716247 NaN
2024-06-10 121.779999 119.771385 108.077489 95.561990 NaN
2024-06-11 120.910004 120.150924 109.244081 96.354115 NaN
2024-06-12 125.199997 121.833949 110.694619 97.255549 NaN
2024-06-13 129.610001 124.425966 112.414199 98.266625 NaN
2024-06-14 131.880005 126.910646 114.183818 99.317043 NaN
2024-06-17 130.979996 128.267096 115.710743 100.306511 NaN
2024-06-18 135.580002 130.704731 117.517039 101.408807 NaN
2024-06-20 130.779999 130.729820 118.722763 102.326657 NaN
2024-06-21 126.570000 129.343213 119.436148 103.084262 NaN
2024-06-24 118.110001 125.598809 119.315589 103.553816 NaN
2024-06-25 126.089996 125.762538 119.931444 104.258072 NaN
2024-06-26 126.400002 125.975026 120.519495 104.950007 NaN
2024-06-27 123.989998 125.313350 120.834995 105.545007 NaN
2024-06-28 123.540001 124.722234 121.080905 106.107350 NaN
2024-07-01 124.300003 124.581490 121.373550 106.675871 NaN
2024-07-02 122.669998 123.944326 121.491409 107.175687 NaN
2024-07-03 128.279999 125.389550 122.108554 107.835197 NaN
2024-07-05 125.830002 125.536368 122.446867 108.397534 NaN
2024-07-08 128.199997 126.424244 122.969879 109.016361 NaN
2024-07-09 131.380005 128.076164 123.734436 109.715225 NaN
2024-07-10 134.910004 130.354111 124.750396 110.502562 NaN
2024-07-11 127.400002 129.369408 124.991270 111.030607 NaN
2024-07-12 129.240005 129.326274 125.377518 111.599651 NaN
2024-07-15 128.440002 129.030850 125.655926 112.125912 NaN
2024-07-16 126.360001 128.140567 125.719933 112.570727 NaN
2024-07-17 117.989998 124.757044 125.017211 112.740079 NaN
2024-07-18 121.089996 123.534695 124.660192 113.001014 NaN
2024-07-19 117.930000 121.666463 124.048356 113.155045 NaN
2024-07-22 123.540001 122.290976 124.002142 113.479575 NaN
2024-07-23 122.589996 122.390649 123.873765 113.764276 NaN
2024-07-24 114.250000 119.677100 122.998878 113.779454 NaN
2024-07-25 112.279999 117.211399 122.024434 113.732596 NaN
2024-07-26 113.059998 115.827599 121.209485 113.711578 NaN
Sell_Signal
Date
2024-05-01 NaN
2024-05-02 NaN
2024-05-03 NaN
2024-05-06 NaN
2024-05-07 NaN
2024-05-08 NaN
2024-05-09 NaN
2024-05-10 NaN
2024-05-13 NaN
2024-05-14 NaN
2024-05-15 NaN
2024-05-16 NaN
2024-05-17 NaN
2024-05-20 NaN
2024-05-21 NaN
2024-05-22 NaN
2024-05-23 NaN
2024-05-24 NaN
2024-05-28 NaN
2024-05-29 NaN
2024-05-30 NaN
2024-05-31 NaN
2024-06-03 NaN
2024-06-04 NaN
2024-06-05 NaN
2024-06-06 NaN
2024-06-07 NaN
2024-06-10 NaN
2024-06-11 NaN
2024-06-12 NaN
2024-06-13 NaN
2024-06-14 NaN
2024-06-17 NaN
2024-06-18 NaN
2024-06-20 NaN
2024-06-21 NaN
2024-06-24 NaN
2024-06-25 NaN
2024-06-26 NaN
2024-06-27 NaN
2024-06-28 NaN
2024-07-01 NaN
2024-07-02 NaN
2024-07-03 NaN
2024-07-05 NaN
2024-07-08 NaN
2024-07-09 NaN
2024-07-10 NaN
2024-07-11 NaN
2024-07-12 NaN
2024-07-15 NaN
2024-07-16 NaN
2024-07-17 117.989998
2024-07-18 NaN
2024-07-19 NaN
2024-07-22 NaN
2024-07-23 NaN
2024-07-24 NaN
2024-07-25 NaN
2024-07-26 NaN
# Define the function for buy/sell signals based on EMAs
def buy_sell_ewma3(data):
buy_list = []
sell_list = []
flag_long = False
flag_short = False
for i in range(0, len(data)):
if data['Middle'][i] < data['Long'][i] and data['Short'][i] < data['Middle'][i] and flag_long == False and flag_short == False:
buy_list.append(data['Adj Close'][i])
sell_list.append(np.nan)
flag_short = True
elif flag_short == True and data['Short'][i] > data['Middle'][i]:
sell_list.append(data['Adj Close'][i])
buy_list.append(np.nan)
flag_short = False
elif data['Middle'][i] > data['Long'][i] and data['Short'][i] > data['Middle'][i] and flag_long == False and flag_short == False:
buy_list.append(data['Adj Close'][i])
sell_list.append(np.nan)
flag_long = True
elif flag_long == True and data['Short'][i] < data['Middle'][i]:
sell_list.append(data['Adj Close'][i])
buy_list.append(np.nan)
flag_long = False
else:
buy_list.append(np.nan)
sell_list.append(np.nan)
return buy_list, sell_list
# Extract data for the 6-month period
nvda_adj_6mo = nvda_sma[['Adj Close']]['2024-05-01':'2024-10-31']
# Calculate EMAs
nvda_adj_6mo['Short'] = nvda_adj_6mo['Adj Close'].ewm(span=5, adjust=False).mean()
nvda_adj_6mo['Middle'] = nvda_adj_6mo['Adj Close'].ewm(span=21, adjust=False).mean()
nvda_adj_6mo['Long'] = nvda_adj_6mo['Adj Close'].ewm(span=63, adjust=False).mean()
# Generate buy/sell signals
nvda_adj_6mo['Buy'] = buy_sell_ewma3(nvda_adj_6mo)[0]
nvda_adj_6mo['Sell'] = buy_sell_ewma3(nvda_adj_6mo)[1]
# Define the function for plotting buy/sell signals and EMAs
def buy_sell_ewma3_plot():
sns.set(rc={'figure.figsize':(18, 10)})
plt.plot(nvda_adj_6mo['Adj Close'], label=f"{label_txt}", color='blue', alpha=0.35)
plt.plot(nvda_adj_6mo['Short'], label='Short/Fast EMA', color='red', alpha=0.35)
plt.plot(nvda_adj_6mo['Middle'], label='Middle/Medium EMA', color='orange', alpha=0.35)
plt.plot(nvda_adj_6mo['Long'], label='Long/Slow EMA', color='green', alpha=0.35)
plt.scatter(nvda_adj_6mo.index, nvda_adj_6mo['Buy'], color='green', label='Buy Signal', marker='^', alpha=1)
plt.scatter(nvda_adj_6mo.index, nvda_adj_6mo['Sell'], color='red', label='Sell Signal', marker='v', alpha=1)
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend()
plt.show()
# Set ticker and title
ticker = 'NVDA'
title_txt = "Trading signals for NVDA stock"
label_txt = "NVDA Adj Close"
# Call the function to plot
buy_sell_ewma3_plot()
5.1.6 Exponential Smoothing Single Exponential Smoothing, also known as Simple Exponential Smoothing, is a time series forecasting method for univariate data without a trend or seasonality. It requires an alpha parameter, also called the smoothing factor or smoothing coefficient, to control the rate at which the influence of the observations at prior time steps decay exponentially.
# Exponential smoothing function
def exponential_smoothing(series, alpha):
result = [series[0]] # first value is same as series
for n in range(1, len(series)):
result.append(alpha * series[n] + (1 - alpha) * result[n-1])
return result
# Function to plot exponential smoothing
def plot_exponential_smoothing(series, alphas):
plt.figure(figsize=(17, 8))
for alpha in alphas:
plt.plot(exponential_smoothing(series, alpha), label=f"Alpha {alpha}")
plt.plot(series.values, "c", label=f"{label_txt}")
plt.xlabel('Days', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend(loc="best")
plt.axis('tight')
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.grid(True)
plt.show()
# Set ticker and title for NVDA
ticker = 'NVDA'
title_txt = "Single Exponential Smoothing for NVDA stock using 0.05 and 0.3 as alpha values"
label_txt = "NVDA Adj Close"
# Assuming nvda_sma is your dataframe containing NVDA stock data
plot_exponential_smoothing(nvda_sma['Adj Close'].loc['2024-01-01':'2024-12-31'], [0.05, 0.3])
The smaller the smoothing factor (coefficient), the smoother the time series will be. As the smoothing factor approaches 0, we approach the moving average model so the smoothing factor of 0.05 produces a smoother time series than 0.3. This indicates slow learning (past observations have a large influence on forecasts). A value close to 1 indicates fast learning (that is, only the most recent values influence the forecasts).
Double Exponential Smoothing (Holt’s Linear Trend Model) is an extension being a recursive use of Exponential Smoothing twice where beta is the trend smoothing factor, and takes values between 0 and 1. It explicitly adds support for trends.
# Double Exponential Smoothing function
def double_exponential_smoothing(series, alpha, beta):
result = [series[0]]
for n in range(1, len(series) + 1):
if n == 1:
level, trend = series[0], series[1] - series[0]
if n >= len(series): # forecasting
value = result[-1]
else:
value = series[n]
last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
trend = beta * (level - last_level) + (1 - beta) * trend
result.append(level + trend)
return result
# Function to plot Double Exponential Smoothing
def plot_double_exponential_smoothing(series, alphas, betas):
plt.figure(figsize=(17, 8))
for alpha in alphas:
for beta in betas:
plt.plot(double_exponential_smoothing(series, alpha, beta), label=f"Alpha {alpha}, Beta {beta}")
plt.plot(series.values, label=f"{label_txt}")
plt.xlabel('Days', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend(loc="best")
plt.axis('tight')
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.grid(True)
plt.show()
# Set ticker and title for NVDA
ticker = 'NVDA'
title_txt = "Double Exponential Smoothing for NVDA stock with different alpha and beta values"
label_txt = "NVDA Adj Close"
# Assuming nvda_sma is your dataframe containing NVDA stock data
plot_double_exponential_smoothing(nvda_sma['Adj Close'].loc['2024-01-01':'2024-12-31'], alphas=[0.9, 0.02], betas=[0.9, 0.02])
The third main type is Triple Exponential Smoothing (Holt Winters Method) which is an extension of Exponential Smoothing that explicitly adds support for seasonality, or periodic fluctuations.
5.1.7 Moving average convergence divergence (MACD) The MACD is a trend-following momentum indicator turning two trend-following indicators, moving averages, into a momentum oscillator by subtracting the longer moving average from the shorter one.
It is useful although lacking one prediction element - because it is unbounded it is not particularly useful for identifying overbought and oversold levels. Traders can look for signal line crossovers, neutral/centreline crossovers (otherwise known as the 50 level) and divergences from the price action to generate signals.
The default parameters are 26 EMA of prices, 12 EMA of prices and a 9-moving average of the difference between the first two.
# Function to plot the adjusted close price for a 3-month period
def adj_3mo():
sns.set(rc={'figure.figsize': (15, 9)})
nvda_sma['Adj Close'].loc['2024-05-15':'2024-08-15'].plot(label=f"{label_txt}")
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend()
plt.show()
# Set title and label for NVDA
title_txt = "NVDA Adjusted Close Price from 1 Aug - 31 Oct 2024"
label_txt = "NVDA Adj Close"
# Call the function to plot
adj_3mo()
# Extracting the data for the specified period
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-05-15':'2024-08-15']
# Calculate EMAs and MACD
ShortEMA = nvda_adj_3mo['Adj Close'].ewm(span=12, adjust=False).mean()
LongEMA = nvda_adj_3mo['Adj Close'].ewm(span=26, adjust=False).mean()
MACD = ShortEMA - LongEMA
signal = MACD.ewm(span=9, adjust=False).mean()
# Define the MACD plotting function
def macd():
plt.figure(figsize=(15, 9))
plt.plot(nvda_adj_3mo.index, MACD, label=macd_label_txt, color='red')
plt.plot(nvda_adj_3mo.index, signal, label=sig_label_txt, color='blue')
plt.title(title_txt, color='black', fontsize=20)
plt.xticks(rotation=45)
plt.xlabel('Date', color='black', fontsize=15)
plt.legend(loc='upper left')
plt.show()
# Set title and labels
title_txt = 'MACD and Signal line for NVDA stock from 15 may - 15 Aug 2024'
macd_label_txt = "NVDA MACD"
sig_label_txt = "Signal Line"
# Call the function to plot
macd()
When the MACD line crosses above the signal line this indicates a good time to buy.
# Create new columns for the MACD and Signal Line data
nvda_adj_3mo['MACD'] = MACD
nvda_adj_3mo['Signal Line'] = signal
nvda_adj_3mo
| Adj Close | MACD | Signal Line | |
|---|---|---|---|
| Date | |||
| 2024-05-15 | 94.622223 | 0.000000 | 0.000000 |
| 2024-05-16 | 94.351250 | -0.021616 | -0.004323 |
| 2024-05-17 | 92.471397 | -0.188265 | -0.041112 |
| 2024-05-20 | 94.772217 | -0.133144 | -0.059518 |
| 2024-05-21 | 95.378166 | -0.040103 | -0.055635 |
| 2024-05-22 | 94.942200 | -0.001528 | -0.044814 |
| 2024-05-23 | 103.790482 | 0.734557 | 0.111060 |
| 2024-05-24 | 106.460258 | 1.515865 | 0.392021 |
| 2024-05-28 | 113.891647 | 2.703543 | 0.854326 |
| 2024-05-29 | 114.815567 | 3.676954 | 1.418851 |
| 2024-05-30 | 110.490921 | 4.052709 | 1.945623 |
| 2024-05-31 | 109.624001 | 4.231763 | 2.402851 |
| 2024-06-03 | 114.990555 | 4.751924 | 2.872665 |
| 2024-06-04 | 116.427429 | 5.219926 | 3.342118 |
| 2024-06-05 | 122.429947 | 6.005942 | 3.874883 |
| 2024-06-06 | 120.988060 | 6.438300 | 4.387566 |
| 2024-06-07 | 120.878075 | 6.694898 | 4.849032 |
| 2024-06-10 | 121.779999 | 6.891589 | 5.257544 |
| 2024-06-11 | 120.910004 | 6.897754 | 5.585586 |
| 2024-06-12 | 125.199997 | 7.166199 | 5.901708 |
| 2024-06-13 | 129.610001 | 7.646648 | 6.250696 |
| 2024-06-14 | 131.880005 | 8.117009 | 6.623959 |
| 2024-06-17 | 130.979996 | 8.321229 | 6.963413 |
| 2024-06-18 | 135.580002 | 8.753354 | 7.321401 |
| 2024-06-20 | 130.779999 | 8.609254 | 7.578972 |
| 2024-06-21 | 126.570000 | 8.062403 | 7.675658 |
| 2024-06-24 | 118.110001 | 6.867208 | 7.513968 |
| 2024-06-25 | 126.089996 | 6.489124 | 7.308999 |
| 2024-06-26 | 126.400002 | 6.143683 | 7.075936 |
| 2024-06-27 | 123.989998 | 5.610775 | 6.782904 |
| 2024-06-28 | 123.540001 | 5.093416 | 6.445006 |
| 2024-07-01 | 124.300003 | 4.690660 | 6.094137 |
| 2024-07-02 | 122.669998 | 4.191627 | 5.713635 |
| 2024-07-03 | 128.279999 | 4.200401 | 5.410988 |
| 2024-07-05 | 125.830002 | 3.963965 | 5.121583 |
| 2024-07-08 | 128.199997 | 3.922610 | 4.881789 |
| 2024-07-09 | 131.380005 | 4.099182 | 4.725267 |
| 2024-07-10 | 134.910004 | 4.472404 | 4.674695 |
| 2024-07-11 | 127.400002 | 4.114758 | 4.562707 |
| 2024-07-12 | 129.240005 | 3.934440 | 4.437054 |
| 2024-07-15 | 128.440002 | 3.684510 | 4.286545 |
| 2024-07-16 | 126.360001 | 3.280782 | 4.085392 |
| 2024-07-17 | 117.989998 | 2.259390 | 3.720192 |
| 2024-07-18 | 121.089996 | 1.680699 | 3.312293 |
| 2024-07-19 | 117.930000 | 0.956077 | 2.841050 |
| 2024-07-22 | 123.540001 | 0.824978 | 2.437836 |
| 2024-07-23 | 122.589996 | 0.637081 | 2.077685 |
| 2024-07-24 | 114.250000 | -0.182692 | 1.625609 |
| 2024-07-25 | 112.279999 | -0.980033 | 1.104481 |
| 2024-07-26 | 113.059998 | -1.531339 | 0.577317 |
# Extracting the data for the specified period for NVDA
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-05-15':'2024-08-15']
# Calculate EMAs and MACD
ShortEMA = nvda_adj_3mo['Adj Close'].ewm(span=12, adjust=False).mean()
LongEMA = nvda_adj_3mo['Adj Close'].ewm(span=26, adjust=False).mean()
MACD = ShortEMA - LongEMA
signal = MACD.ewm(span=9, adjust=False).mean()
# Create a DataFrame with MACD and Signal Line
macd_signal_df = pd.DataFrame({
'Adj Close': nvda_adj_3mo['Adj Close'],
'MACD': MACD,
'Signal Line': signal
})
# Function to signal when to buy and sell
def buy_sell_macd(df):
Buy = []
Sell = []
flag = -1
for i in range(len(df)):
if df['MACD'][i] > df['Signal Line'][i]:
Sell.append(np.nan)
if flag != 1:
Buy.append(df['Adj Close'][i])
flag = 1
else:
Buy.append(np.nan)
elif df['MACD'][i] < df['Signal Line'][i]:
Buy.append(np.nan)
if flag != 0:
Sell.append(df['Adj Close'][i])
flag = 0
else:
Sell.append(np.nan)
else:
Buy.append(np.nan)
Sell.append(np.nan)
return (Buy, Sell)
# Create buy and sell columns
a = buy_sell_macd(macd_signal_df)
nvda_adj_3mo['Buy_Signal_Price'] = a[0]
nvda_adj_3mo['Sell_Signal_Price'] = a[1]
# Set labels and titles
ticker = 'NVDA'
title_txt = 'MACD and Signal line for NVDA stock from 15 May - 15 Aug 2024'
macd_label_txt = "NVDA MACD"
sig_label_txt = "Signal Line"
# Function to plot MACD and Signal Line
def macd():
plt.figure(figsize=(15, 9))
plt.plot(nvda_adj_3mo.index, MACD, label=f"{macd_label_txt}", color='red')
plt.plot(nvda_adj_3mo.index, signal, label=f"{sig_label_txt}", color='blue')
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xticks(rotation=45)
plt.xlabel('Date', color='black', fontsize=15)
plt.legend(loc='upper left')
plt.show()
# Call the functions to plot and signal
macd()
# Extract data for NVDA from May 15 to August 15, 2024
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-05-15':'2024-08-15']
# Calculate EMAs and MACD
ShortEMA = nvda_adj_3mo['Adj Close'].ewm(span=12, adjust=False).mean()
LongEMA = nvda_adj_3mo['Adj Close'].ewm(span=26, adjust=False).mean()
MACD = ShortEMA - LongEMA
signal = MACD.ewm(span=9, adjust=False).mean()
# Create new columns for the MACD and Signal Line data
nvda_adj_3mo['MACD'] = MACD
nvda_adj_3mo['Signal Line'] = signal
# Function to signal when to buy and sell
def buy_sell_macd(signal):
Buy = []
Sell = []
flag = -1
for i in range(len(signal)):
if signal['MACD'][i] > signal['Signal Line'][i]:
Sell.append(np.nan)
if flag != 1:
Buy.append(signal['Adj Close'][i])
flag = 1
else:
Buy.append(np.nan)
elif signal['MACD'][i] < signal['Signal Line'][i]:
Buy.append(np.nan)
if flag != 0:
Sell.append(signal['Adj Close'][i])
flag = 0
else:
Sell.append(np.nan)
else:
Buy.append(np.nan)
Sell.append(np.nan)
return (Buy, Sell)
# Create buy and sell columns
a = buy_sell_macd(nvda_adj_3mo)
nvda_adj_3mo['Buy_Signal_Price'] = a[0]
nvda_adj_3mo['Sell_Signal_Price'] = a[1]
# Plot buy and sell signals
def buy_sell_macd_plot():
plt.figure(figsize=(20, 10))
plt.scatter(nvda_adj_3mo.index, nvda_adj_3mo['Buy_Signal_Price'], color='green', label='Buy', marker='^', alpha=1)
plt.scatter(nvda_adj_3mo.index, nvda_adj_3mo['Sell_Signal_Price'], color='red', label='Sell', marker='v', alpha=1)
plt.plot(nvda_adj_3mo['Adj Close'], label='Adj Close Price', alpha=0.35)
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Adj Close Price')
plt.legend(loc='upper left')
plt.show()
# Set labels and titles
ticker = 'NVDA'
title_txt = 'NVDA Adjusted Close Price Buy & Sell Signals'
# Call the function to plot
buy_sell_macd_plot()
5.2 Momentum Strategies In momentum algorithmic trading strategies stocks have momentum (i.e. upward or downward trends) that we can detect and exploit.
5.2.1 Relative Strength Index (RSI) The RSI is a momentum indicator. A typical momentum strategy will buy stocks that have been showing an upward trend in hopes that the trend will continue, and make predictions based on whether the past recent values were going up or going down.
The RSI determines the level of overbought (70) and oversold (30) zones using a default lookback period of 14 i.e. it uses the last 14 values to calculate its values. The idea is to buy when the RSI touches the 30 barrier and sell when it touches the 70 barrier.
# Extract data for NVDA from May 15 to August 15, 2024
nvda_adj_12mo = nvda_sma[['Adj Close']]['2024-01-01':'2024-12-31']
# Calculate the RSI
delta = nvda_adj_12mo['Adj Close'].diff(1)
up = delta.copy()
down = delta.copy()
up[up < 0] = 0
down[down > 0] = 0
period = 14
# Calculate average gain and average loss
AVG_Gain = up.rolling(window=period).mean()
AVG_Loss = down.abs().rolling(window=period).mean()
# Calculate RSI based on SMA
RS = AVG_Gain / AVG_Loss
RSI = 100.0 - (100.0 / (1.0 + RS))
# Create dataframe with Adjusted Close and RSI
new_df = pd.DataFrame()
new_df['Adj Close'] = nvda_adj_12mo['Adj Close']
new_df['RSI'] = RSI
# Function to plot Adjusted Close price
def adj_close_12mo():
sns.set(rc={'figure.figsize':(20, 10)})
plt.plot(new_df.index, new_df['Adj Close'], label='Adj Close')
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Stock Price (p)', color='black', fontsize=15)
plt.legend(loc='upper left')
plt.show()
# Function to plot RSI
def rsi():
sns.set(rc={'figure.figsize':(20, 10)})
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('RSI', color='black', fontsize=15)
RSI.plot()
plt.show()
# Function to plot RSI with significant levels
def rsi_sma():
plt.figure(figsize=(20, 10))
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.plot(new_df.index, new_df['RSI'], label='RSI')
plt.axhline(0, linestyle='--', alpha=0.5, color='gray')
plt.axhline(10, linestyle='--', alpha=0.5, color='orange')
plt.axhline(20, linestyle='--', alpha=0.5, color='green')
plt.axhline(30, linestyle='--', alpha=0.5, color='red')
plt.axhline(70, linestyle='--', alpha=0.5, color='red')
plt.axhline(80, linestyle='--', alpha=0.5, color='green')
plt.axhline(90, linestyle='--', alpha=0.5, color='orange')
plt.axhline(100, linestyle='--', alpha=0.5, color='gray')
plt.xlabel('Date', color='black', fontsize=15)
plt.show()
# Set labels and titles
ticker = 'NVDA'
title_txt = 'NVDA Adjusted Close Price from 01 Jan - 31 Dec 2024'
# Call the functions to plot
adj_close_12mo()
rsi()
title_txt = 'NVDA RSI based on SMA'
rsi_sma()
# Define period for RSI calculation
period = 14
# Update the data for NVDA for the period May 15 to August 15, 2024
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-01-01':'2024-12-31']
# Calculate the daily price changes
delta = nvda_adj_3mo['Adj Close'].diff(1)
# Get positive gains (up) and negative gains (down)
up = delta.copy()
down = delta.copy()
up[up < 0] = 0
down[down > 0] = 0
# Calculate EWMA average gain and average loss
AVG_Gain2 = up.ewm(span=period).mean()
AVG_Loss2 = down.abs().ewm(span=period).mean()
# Calculate RSI based on EWMA
RS2 = AVG_Gain2 / AVG_Loss2
RSI2 = 100.0 - (100.0 / (1.0 + RS2))
# Create DataFrame for Adjusted Close and EWMA RSI
new_df2 = pd.DataFrame()
new_df2['Adj Close'] = nvda_adj_3mo['Adj Close']
new_df2['RSI2'] = RSI2
# Function to plot RSI with significant levels
def rsi_ewma():
plt.figure(figsize=(20, 10))
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('RSI', color='black', fontsize=15)
plt.plot(new_df2.index, new_df2['RSI2'], label='RSI2')
plt.axhline(0, linestyle='--', alpha=0.5, color='gray')
plt.axhline(10, linestyle='--', alpha=0.5, color='orange')
plt.axhline(20, linestyle='--', alpha=0.5, color='green')
plt.axhline(30, linestyle='--', alpha=0.5, color='red')
plt.axhline(70, linestyle='--', alpha=0.5, color='red')
plt.axhline(80, linestyle='--', alpha=0.5, color='green')
plt.axhline(90, linestyle='--', alpha=0.5, color='orange')
plt.axhline(100, linestyle='--', alpha=0.5, color='gray')
plt.legend(loc='upper left')
plt.show()
# Set title for the plot
title_txt = 'NVDA RSI based on EWMA from Jan 01 - dec 31, 2024'
# Call the function to plot
rsi_ewma()
It appears that RSI value dips below the 20 significant level in January 2024 indicating that the stock was oversold and presented a buying opportunity for an investor before a price rise.
5.2.2 Money Flow Index (MFI) Money Flow Index (MFI) is a technical oscillator, and momentum indicator, that uses price and volume data for identifying overbought or oversold signals in an asset. It can also be used to spot divergences which warn of a trend change in price. The oscillator moves between 0 and 100 and a reading of above 80 implies overbought conditions, and below 20 implies oversold conditions.
It is related to the Relative Strength Index (RSI) but incorporates volume, whereas the RSI only considers price.
# Define period for MFI calculation
period = 14
# Extract data for NVDA for the period May 15 to August 15, 2024
nvda_3mo = nvda_sma[['Close', 'High', 'Low', 'Volume']]['2024-01-01':'2024-12-31']
# Function to plot Close Price
def nvda_close_plot():
plt.figure(figsize=(20, 10))
plt.plot(nvda_3mo['Close'])
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Close Price', color='black', fontsize=15)
plt.legend([label_txt], loc='upper left')
plt.show()
# Calculate typical price
typical_price = (nvda_3mo['Close'] + nvda_3mo['High'] + nvda_3mo['Low']) / 3
# Calculate the money flow
money_flow = typical_price * nvda_3mo['Volume']
# Get all positive and negative money flows
positive_flow = []
negative_flow = []
# Loop through typical price
for i in range(1, len(typical_price)):
if typical_price[i] > typical_price[i-1]:
positive_flow.append(money_flow[i-1])
negative_flow.append(0)
elif typical_price[i] < typical_price[i-1]:
negative_flow.append(money_flow[i-1])
positive_flow.append(0)
else:
positive_flow.append(0)
negative_flow.append(0)
# Get all positive and negative money flows within the same time period
positive_mf = []
negative_mf = []
for i in range(period-1, len(positive_flow)):
positive_mf.append(sum(positive_flow[i + 1 - period : i+1]))
for i in range(period-1, len(negative_flow)):
negative_mf.append(sum(negative_flow[i + 1 - period : i+1]))
# Calculate Money Flow Index (MFI)
mfi = 100 * (np.array(positive_mf) / (np.array(positive_mf) + np.array(negative_mf)))
# Create DataFrame for MFI
df2 = pd.DataFrame()
df2['MFI'] = mfi
# Function to plot MFI
def mfi_plot():
plt.figure(figsize=(20, 10))
plt.plot(df2['MFI'], label='MFI')
plt.axhline(10, linestyle='--', color='orange')
plt.axhline(20, linestyle='--', color='blue')
plt.axhline(80, linestyle='--', color='blue')
plt.axhline(90, linestyle='--', color='orange')
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Time periods', color='black', fontsize=15)
plt.ylabel('MFI Values', color='black', fontsize=15)
plt.legend(loc='upper left')
plt.show()
# Create new DataFrame with MFI
new_mfi_df = pd.DataFrame()
new_mfi_df = nvda_3mo[period:]
new_mfi_df['MFI'] = mfi
# Function to get buy and sell signals
def get_signal(data, high, low):
buy_signal = []
sell_signal = []
for i in range(len(data['MFI'])):
if data['MFI'][i] > high:
buy_signal.append(np.nan)
sell_signal.append(data['Close'][i])
elif data['MFI'][i] < low:
buy_signal.append(data['Close'][i])
sell_signal.append(np.nan)
else:
sell_signal.append(np.nan)
buy_signal.append(np.nan)
return (buy_signal, sell_signal)
# Add new columns (Buy & Sell)
new_mfi_df['Buy'] = get_signal(new_mfi_df, 80, 20)[0]
new_mfi_df['Sell'] = get_signal(new_mfi_df, 80, 20)[1]
# Function to plot buy and sell signals
def mfi_buy_sell_plot():
plt.figure(figsize=(20, 10))
plt.plot(new_mfi_df['Close'], label='Close Price', alpha=0.5)
plt.scatter(new_mfi_df.index, new_mfi_df['Buy'], color='green', label='Buy Signal', marker='^', alpha=1)
plt.scatter(new_mfi_df.index, new_mfi_df['Sell'], color='red', label='Sell Signal', marker='v', alpha=1)
plt.title(f"{title_txt}", color='black', fontsize=20)
plt.xlabel('Date', color='black', fontsize=15)
plt.ylabel('Close Price', color='black', fontsize=15)
plt.legend(loc='upper left')
plt.show()
# Set title for the plots
title_txt = "NVDA MFI and Trading Signals from Jun 01 - Dec 31, 2024"
label_txt = "NVDA Close Price"
# Call functions to plot
nvda_close_plot()
mfi_plot()
mfi_buy_sell_plot()
5.2.3 Stochastic Oscillator The stochastic oscillator is a momentum indicator comparing the closing price of a security to the range of its prices over a certain period of time and is one of the best-known momentum indicators along with RSI and MACD.
The intuition is that in a market trending upward, prices will close near the high, and in a market trending downward, prices close near the low.
The stochastic oscillator is plotted within a range of zero and 100. The default parameters are an overbought zone of 80, an oversold zone of 20 and well-used lookbacks period of 14 and 5 which can be used simultaneously. The oscillator has two lines, the %K and %D, where the former measures momentum and the latter measures the moving average of the former. The %D line is more important of the two indicators and tends to produce better trading signals which are created when the %K crosses through the %D.
# Define period for the rolling windows
period = 14
# Assuming `nvda` is the DataFrame with NVDA stock data for the period May 15 to August 15, 2024
nvda_so = nvda_sma.copy()
nvda_so = nvda_so['2024-01-01':'2024-12-31']
# Create the "L14" column in the DataFrame
nvda_so['L14'] = nvda_so['Low'].rolling(window=period).min()
# Create the "H14" column in the DataFrame
nvda_so['H14'] = nvda_so['High'].rolling(window=period).max()
# Create the "%K" column in the DataFrame
nvda_so['%K'] = 100 * ((nvda_so['Close'] - nvda_so['L14']) / (nvda_so['H14'] - nvda_so['L14']))
# Create the "%D" column in the DataFrame
nvda_so['%D'] = nvda_so['%K'].rolling(window=3).mean()
# Plot Close price and Stochastic Oscillator
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(20, 10))
fig.subplots_adjust(hspace=0.5)
nvda_so['Close'].plot(ax=axes[0])
axes[0].set_title('Close Price')
axes[0].set_ylabel('Close Price')
nvda_so[['%K', '%D']].plot(ax=axes[1])
axes[1].set_title('Stochastic Oscillator')
axes[1].set_ylabel('Oscillator Value')
plt.show()
# Create a column for sell entry signal
nvda_so['Sell Entry'] = ((nvda_so['%K'] < nvda_so['%D']) & (nvda_so['%K'].shift(1) > nvda_so['%D'].shift(1))) & (nvda_so['%D'] > 80)
# Create a column for sell exit signal
nvda_so['Sell Exit'] = ((nvda_so['%K'] > nvda_so['%D']) & (nvda_so['%K'].shift(1) < nvda_so['%D'].shift(1)))
# Create a column for buy entry signal
nvda_so['Buy Entry'] = ((nvda_so['%K'] > nvda_so['%D']) & (nvda_so['%K'].shift(1) < nvda_so['%D'].shift(1))) & (nvda_so['%D'] < 20)
# Create a column for buy exit signal
nvda_so['Buy Exit'] = ((nvda_so['%K'] < nvda_so['%D']) & (nvda_so['%K'].shift(1) > nvda_so['%D'].shift(1)))
# Create a placeholder column for short positions
nvda_so['Short'] = np.nan
nvda_so.loc[nvda_so['Sell Entry'], 'Short'] = -1
nvda_so.loc[nvda_so['Sell Exit'], 'Short'] = 0
# Set initial position to flat
nvda_so['Short'].iloc[0] = 0
# Forward fill the position column
nvda_so['Short'] = nvda_so['Short'].fillna(method='ffill')
# Create a placeholder column for long positions
nvda_so['Long'] = np.nan
nvda_so.loc[nvda_so['Buy Entry'], 'Long'] = 1
nvda_so.loc[nvda_so['Buy Exit'], 'Long'] = 0
# Set initial position to flat
nvda_so['Long'].iloc[0] = 0
# Forward fill the position column
nvda_so['Long'] = nvda_so['Long'].fillna(method='ffill')
# Add Long and Short positions together to get final strategy position
nvda_so['Position'] = nvda_so['Long'] + nvda_so['Short']
# Plot the position through time
nvda_so['Position'].plot(figsize=(20, 10))
plt.title('Strategy Position')
plt.ylabel('Position')
plt.show()
# Set up a column holding the daily NVDA returns
nvda_so['Market Returns'] = nvda_so['Close'].pct_change()
# Create column for Strategy Returns
nvda_so['Strategy Returns'] = nvda_so['Market Returns'] * nvda_so['Position'].shift(1)
# Plot strategy returns versus NVDA returns
nvda_so[['Strategy Returns', 'Market Returns']].cumsum().plot(figsize=(20, 10))
plt.title('Strategy Returns versus NVDA Returns')
plt.ylabel('Cumulative Returns')
plt.show()
5.2.4 Rate of Change (ROC) --------------- Candlestick, ROC and Volume plot ---------------------------------- The ROC indicator is a pure momentum oscillator. The ROC calculation compares the current price with the price "n" periods ago e.g. when we compute the ROC of the daily price with a 9-day lag, we are simply looking at how much, in percentage, the price has gone up (or down) compared to 9 days ago. Like other momentum indicators, ROC has overbought and oversold zones that may be adjusted according to market conditions.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import mplfinance as mpf
# Assuming `nvda_sma` is the DataFrame with NVDA stock data
nvda_smanvda_roc = nvda_sma.copy()
nvda_roc_12mo = nvda_roc['2024-01-01':'2024-12-31']
# Calculate ROC
nvda_roc_12mo['ROC'] = (nvda_roc_12mo['Adj Close'] / nvda_roc_12mo['Adj Close'].shift(9) - 1) * 100
# Select data for the last 100 days of 2024
nvda_roc_100d = nvda_roc_12mo[-100:]
dates = nvda_roc_100d.index
price = nvda_roc_100d['Adj Close']
roc = nvda_roc_100d['ROC']
# Plot Price and ROC
fig, (price_ax, roc_ax) = plt.subplots(2, 1, figsize=(16, 10), sharex=True)
fig.subplots_adjust(hspace=0)
plt.rcParams.update({'font.size': 14})
# Price subplot
price_ax.plot(dates, price, color='blue', linewidth=2, label="Adj Closing Price")
price_ax.legend(loc="upper left", fontsize=12)
price_ax.set_ylabel("Price")
price_ax.set_title("NVDA Daily Price", fontsize=24)
price_ax.set_facecolor((.94, .95, .98))
# ROC subplot
roc_ax.plot(dates, roc, color='k', linewidth=1, alpha=0.7, label="9-Day ROC")
roc_ax.legend(loc="upper left", fontsize=12)
roc_ax.set_ylabel("% ROC")
roc_ax.set_facecolor((.98, .97, .93))
# Adding a horizontal line at the zero level in the ROC subplot
roc_ax.axhline(0, color=(.5, .5, .5), linestyle='--', alpha=0.5)
# Filling the areas between the indicator and the zero line
roc_ax.fill_between(dates, 0, roc, where=(roc >= 0), color='g', alpha=0.3, interpolate=True)
roc_ax.fill_between(dates, 0, roc, where=(roc < 0), color='r', alpha=0.3, interpolate=True)
# Formatting the date labels and ROC y-axis
roc_ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
roc_ax.yaxis.set_major_formatter(mticker.PercentFormatter())
# Adding a grid to both subplots
price_ax.grid(True, linestyle='--', alpha=0.5)
roc_ax.grid(True, linestyle='--', alpha=0.5)
# Adding margins around the plots
price_ax.margins(0.05, 0.2)
roc_ax.margins(0.05, 0.2)
# Hiding tick marks from the horizontal and vertical axis
price_ax.tick_params(left=False, bottom=False)
roc_ax.tick_params(left=False, bottom=False, labelrotation=45)
# Hiding all the spines for the price subplot
for s in price_ax.spines.values():
s.set_visible(False)
# Hiding all the spines for the ROC subplot
for s in roc_ax.spines.values():
s.set_visible(False)
# Reinstate a spine in between the two subplots
roc_ax.spines['top'].set_visible(True)
roc_ax.spines['top'].set_linewidth(1.5)
# Candlestick and volume plot
mpf.plot(nvda_roc_100d, type='candle', style='yahoo', figsize=(15, 8), title="NVDA Daily Price", volume=True)
# Combined Candlestick and ROC plot
roc_plot = mpf.make_addplot(roc, panel=2, ylabel='ROC')
mpf.plot(nvda_roc_100d, type='candle', style='yahoo', figsize=(15, 8), addplot=roc_plot, title="NVDA Daily Price", volume=True)
5.3 Volatility trading strategies Volatility trading involves predicting the stability of an asset’s value. Instead of trading on the price rising or falling, traders take a position on whether it will move in any direction.
5.3.1 Bollinger Bands A Bollinger Band is a volatility indicator based on based on the correlation between the normal distribution and stock price and can be used to draw support and resistance curves. It is defined by a set of lines plotted two standard deviations (positively and negatively) away from a simple moving average (SMA) of the security's price, but can be adjusted to user preferences.
By default it calculates a 20-period SMA (the middle band), an upper band two standard deviations above the the moving average and a lower band two standard deviations below it.
If the price moves above the upper band this could indicate a good time to sell, and if it moves below the lower band it could be a good time to buy.
Whereas the RSI can only be used as a confirming factor inside a ranging market, not a trending market, by using Bollinger bands we can calculate the widening variable, or moving spread between the upper and the lower bands, that tells us if prices are about to trend and whether the RSI signals might not be that reliable.
Despite 90% of the price action happening between the bands, however, a breakout is not necessarily a trading signal as it provides no clue as to the direction and extent of future price movement.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Assuming `nvda_12mo` is the DataFrame with NVDA stock data
nvda_12mo_bb = nvda_roc_12mo.copy()
# Parameters
period = 20
# Calculate Bollinger Bands
nvda_12mo_bb['SMA'] = nvda_12mo_bb['Close'].rolling(window=period).mean()
nvda_12mo_bb['STD'] = nvda_12mo_bb['Close'].rolling(window=period).std()
nvda_12mo_bb['Upper'] = nvda_12mo_bb['SMA'] + (nvda_12mo_bb['STD'] * 2)
nvda_12mo_bb['Lower'] = nvda_12mo_bb['SMA'] - (nvda_12mo_bb['STD'] * 2)
# Keep relevant columns
column_list = ['Close', 'SMA', 'Upper', 'Lower']
# Plot Bollinger Bands
def bb_12mo():
nvda_12mo_bb[column_list].plot(figsize=(20,10))
plt.style.use('seaborn')
plt.title('Bollinger Band for NVDA', color='black', fontsize=20)
plt.ylabel('Close Price', color='black', fontsize=15)
plt.show()
bb_12mo()
# Plot Bollinger Bands with shading
def bb_shaded():
fig, ax = plt.subplots(figsize=(20, 10))
x_axis = nvda_12mo_bb.index
ax.fill_between(x_axis, nvda_12mo_bb['Upper'], nvda_12mo_bb['Lower'], color='grey')
ax.plot(x_axis, nvda_12mo_bb['Close'], color='gold', lw=3, label='Close Price')
ax.plot(x_axis, nvda_12mo_bb['SMA'], color='blue', lw=3, label='Simple Moving Average')
ax.set_title('Bollinger Band For NVDA', color='black', fontsize=20)
ax.set_xlabel('Date', color='black', fontsize=15)
ax.set_ylabel('Close Price', color='black', fontsize=15)
plt.xticks(rotation=45)
ax.legend()
plt.show()
bb_shaded()
# Prepare new DataFrame for signals
new_nvda_12mo_bb = nvda_12mo_bb[period-1:]
# Function to get buy and sell signals
def get_signal_bb(data):
buy_signal = []
sell_signal = []
for i in range(len(data['Close'])):
if data['Close'][i] > data['Upper'][i]:
buy_signal.append(np.nan)
sell_signal.append(data['Close'][i])
elif data['Close'][i] < data['Lower'][i]:
sell_signal.append(np.nan)
buy_signal.append(data['Close'][i])
else:
buy_signal.append(np.nan)
sell_signal.append(np.nan)
return buy_signal, sell_signal
# Add buy and sell signals to DataFrame
new_nvda_12mo_bb['Buy'] = get_signal_bb(new_nvda_12mo_bb)[0]
new_nvda_12mo_bb['Sell'] = get_signal_bb(new_nvda_12mo_bb)[1]
# Plot all data with signals
def bb_alldata():
fig, ax = plt.subplots(figsize=(20,10))
x_axis = new_nvda_12mo_bb.index
ax.fill_between(x_axis, new_nvda_12mo_bb['Upper'], new_nvda_12mo_bb['Lower'], color='grey')
ax.plot(x_axis, new_nvda_12mo_bb['Close'], color='gold', lw=3, label='Close Price', alpha=0.5)
ax.plot(x_axis, new_nvda_12mo_bb['SMA'], color='blue', lw=3, label='Moving Average', alpha=0.5)
ax.scatter(x_axis, new_nvda_12mo_bb['Buy'], color='green', lw=3, label='Buy', marker='^', alpha=1)
ax.scatter(x_axis, new_nvda_12mo_bb['Sell'], color='red', lw=3, label='Sell', marker='v', alpha=1)
ax.set_title('Bollinger Band, Close Price, MA and Trading Signals for NVDA', color='black', fontsize=20)
ax.set_xlabel('Date', color='black', fontsize=15)
ax.set_ylabel('Close Price', color='black', fontsize=15)
plt.xticks(rotation=45)
ax.legend()
plt.show()
bb_alldata()
The Bollinger Bands technical indicator is an example of a mean reversion strategy.
5.3.2 Mean reversion strategies In mean reversion algorithmic trading strategies stocks return to their mean and we can exploit when it deviates from that mean.
These strategies usually involve selling into up moves and buying into down moves, a contrarian approach which assumes that the market has become oversold/overbought and prices will revert to their historical trends. This is almost the opposite of trend following where we enter in the direction of the strength and momentum, and momentum strategies such as buying stocks that have been showing an upward trend in hopes that the trend will continue, a continuation approach.
It is almost certainly better to choose technical indicators that complement each other, not just those that move in unison and generate the same signals. The intuition here is that the more indicators you have that confirm each other, the better your chances are to profit. This can be done by combining strategies to form a system, and looking for multiple signals.